1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
19 #ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
22 #error AMD64 must be !LEGACY_BACKEND
26 #error ARM64 must be !LEGACY_BACKEND
32 #ifndef JIT32_GCENCODER
33 #include "gcinfoencoder.h"
36 /*****************************************************************************
38 * Determine what variables die between beforeSet and afterSet, and
39 * update the liveness globals accordingly:
40 * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
43 void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
48 VARSET_TP deadSet(VarSetOps::Diff(compiler, beforeSet, afterSet));
50 if (VarSetOps::IsEmpty(compiler, deadSet))
53 /* iterate through the dead variables */
55 VarSetOps::Iter iter(compiler, deadSet);
56 unsigned varIndex = 0;
57 while (iter.NextElem(&varIndex))
59 varNum = compiler->lvaTrackedToVarNum[varIndex];
60 varDsc = compiler->lvaTable + varNum;
62 /* Remove this variable from the 'deadSet' bit set */
64 noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
66 VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
68 noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
69 VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
71 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
73 /* We are done if the variable is not enregistered */
75 if (!varDsc->lvRegister)
78 if (compiler->verbose)
80 printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
86 #if !FEATURE_FP_REGALLOC
87 // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
88 if (!varDsc->IsFloatRegType())
91 /* Get hold of the appropriate register bit(s) */
93 if (varTypeIsFloating(varDsc->TypeGet()))
95 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
99 regBit = genRegMask(varDsc->lvRegNum);
100 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
101 regBit |= genRegMask(varDsc->lvOtherReg);
105 if (compiler->verbose)
107 printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
108 compiler->compRegVarName(varDsc->lvRegNum));
111 noway_assert((regSet.rsMaskVars & regBit) != 0);
113 regSet.RemoveMaskVars(regBit);
115 // Remove GC tracking if any for this register
117 if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
118 gcInfo.gcMarkRegSetNpt(regBit);
123 /*****************************************************************************
125 * Change the given enregistered local variable node to a register variable node
128 void CodeGenInterface::genBashLclVar(GenTree* tree, unsigned varNum, LclVarDsc* varDsc)
130 noway_assert(tree->gtOper == GT_LCL_VAR);
131 noway_assert(varDsc->lvRegister);
133 if (isRegPairType(varDsc->lvType))
135 /* Check for the case of a variable that was narrowed to an int */
137 if (isRegPairType(tree->gtType))
139 genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
143 noway_assert(tree->gtFlags & GTF_VAR_CAST);
144 noway_assert(tree->gtType == TYP_INT);
148 noway_assert(!isRegPairType(tree->gtType));
151 /* It's a register variable -- modify the node */
153 unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
155 ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
156 tree->SetOper(GT_REG_VAR);
157 tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
159 tree->gtFlags |= livenessFlags;
161 tree->gtRegNum = varDsc->lvRegNum;
162 tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
163 tree->gtRegVar.SetLclNum(varNum);
167 void CodeGen::saveLiveness(genLivenessSet* ls)
169 VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
170 VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
171 ls->maskVars = (regMaskSmall)regSet.rsMaskVars;
172 ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
173 ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
177 void CodeGen::restoreLiveness(genLivenessSet* ls)
179 VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
180 VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
181 regSet.rsMaskVars = ls->maskVars;
182 gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
183 gcInfo.gcRegByrefSetCur = ls->byRefRegs;
187 void CodeGen::checkLiveness(genLivenessSet* ls)
189 assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
190 assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
191 assert(regSet.rsMaskVars == ls->maskVars);
192 assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
193 assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
197 bool CodeGenInterface::genMarkLclVar(GenTree* tree)
202 assert(tree->gtOper == GT_LCL_VAR);
204 /* Does the variable live in a register? */
206 varNum = tree->gtLclVarCommon.gtLclNum;
207 assert(varNum < compiler->lvaCount);
208 varDsc = compiler->lvaTable + varNum;
210 // Retype byref-typed appearances of intptr-typed lclVars as type intptr.
211 if ((varDsc->TypeGet() == TYP_I_IMPL) && (tree->TypeGet() == TYP_BYREF))
213 tree->gtType = TYP_I_IMPL;
216 if (varDsc->lvRegister)
218 genBashLclVar(tree, varNum, varDsc);
228 GenTree* CodeGen::genGetAddrModeBase(GenTree* tree)
236 if (genCreateAddrMode(tree, // address
239 RBM_NONE, // reg mask
243 #if SCALED_ADDR_MODES
246 &cns, // displacement
247 true)) // don't generate code
253 #if FEATURE_STACK_FP_X87
255 void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
257 genFPstkLevel = newValue;
261 unsigned CodeGenInterface::genGetFPstkLevel()
263 return genFPstkLevel;
267 void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
269 noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
270 genFPstkLevel += inc;
274 void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
276 noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
277 genFPstkLevel -= dec;
280 #endif // FEATURE_STACK_FP_X87
282 /*****************************************************************************
284 * Generate code that will set the given register to the integer constant.
287 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
289 noway_assert(type != TYP_REF || val == NULL);
291 /* Does the reg already hold this constant? */
293 if (!regTracker.rsIconIsInReg(val, reg))
297 instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
300 // If we can set a register to a constant with a small encoding, then do that.
301 else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
303 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
308 /* See if a register holds the value or a close value? */
309 bool constantLoaded = false;
311 regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
313 if (srcReg != REG_NA)
317 inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
318 constantLoaded = true;
322 #if defined(_TARGET_XARCH_)
323 /* delta should fit inside a byte */
324 if (delta == (signed char)delta)
326 /* use an lea instruction to set reg */
327 getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
328 constantLoaded = true;
330 #elif defined(_TARGET_ARM_)
331 /* We found a register 'regS' that has the value we need, modulo a small delta.
332 That is, the value we need is 'regS + delta'.
333 We one to generate one of the following instructions, listed in order of preference:
335 adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
337 subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
339 adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
340 subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
341 mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
342 movw regD, icon ; 4 bytes. 0<=icon<=65535
343 add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
344 sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
345 addw regD, regS, delta ; 4 bytes. 0<=delta<=4095
346 subw regD, regS, delta ; 4 bytes. -4095<=delta<=0
348 If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
349 than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
350 can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
351 0x80000000. In this case, delta will be 1.
355 regMaskTP regMask = genRegMask(reg);
356 regMaskTP srcRegMask = genRegMask(srcReg);
358 if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
359 (unsigned_abs(delta) <= 255))
363 else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
364 (unsigned_abs(delta) <= 7))
368 else if (arm_Valid_Imm_For_Mov(val))
370 // fall through to general "!constantLoaded" case below
372 else if (arm_Valid_Imm_For_Add(delta, flags))
379 getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
380 constantLoaded = true;
383 assert(!"Codegen missing");
388 if (!constantLoaded) // Have we loaded it yet?
393 /* or reg,-1 takes 3 bytes */
394 inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
397 /* For SMALL_CODE it is smaller to push a small immediate and
398 then pop it into the dest register */
399 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
401 /* "mov" has no s(sign)-bit and so always takes 6 bytes,
402 whereas push+pop takes 2+1 bytes */
404 inst_IV(INS_push, val);
407 inst_RV(INS_pop, reg, type);
411 #endif // _TARGET_X86_
413 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
418 regTracker.rsTrackRegIntCns(reg, val);
419 gcInfo.gcMarkRegPtrVal(reg, type);
422 /*****************************************************************************
424 * Find an existing register set to the given integer constant, or
425 * pick a register and generate code that will set it to the integer constant.
427 * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
428 * to pick some register to set. NOTE that this means the returned regNumber
429 * might *not* be in regBest. It also implies that you should lock any registers
430 * you don't want spilled (not just mark as used).
434 regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
439 // Is there already a register with zero that we can use?
440 regCns = regTracker.rsIconIsInReg(val);
442 if (regCns == REG_NA)
445 // If not, grab a register to hold the constant, preferring
446 // any register besides RBM_TMP_0 so it can hopefully be re-used
447 regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
449 // Now set the constant
450 genSetRegToIcon(regCns, val, type);
453 // NOTE: there is guarantee that regCns is in regBest's mask
457 /*****************************************************************************/
458 /*****************************************************************************
460 * Add the given constant to the specified register.
461 * 'tree' is the resulting tree
464 void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTree* tree, var_types dstType, bool ovfl)
466 bool setFlags = (tree != NULL) && tree->gtSetFlags();
468 #ifdef _TARGET_XARCH_
469 /* First check to see if we can generate inc or dec instruction(s) */
470 /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
471 if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
473 emitAttr size = emitTypeSize(dstType);
478 inst_RV(INS_inc, reg, dstType, size);
481 inst_RV(INS_inc, reg, dstType, size);
483 goto UPDATE_LIVENESS;
486 inst_RV(INS_dec, reg, dstType, size);
489 inst_RV(INS_dec, reg, dstType, size);
491 goto UPDATE_LIVENESS;
496 insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
497 inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
500 #ifdef _TARGET_XARCH_
505 genFlagsEqualToReg(tree, reg);
507 regTracker.rsTrackRegTrash(reg);
509 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
513 if (!tree->OperIsAssignment())
515 genMarkTreeInReg(tree, reg);
516 if (varTypeIsGC(tree->TypeGet()))
517 gcInfo.gcMarkRegSetByref(genRegMask(reg));
522 /*****************************************************************************
524 * Subtract the given constant from the specified register.
525 * Should only be used for unsigned sub with overflow. Else
526 * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
527 * for these cases as the flags are set differently, and the following
528 * check for overflow won't work correctly.
529 * 'tree' is the resulting tree.
532 void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTree* tree)
534 noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
535 ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
536 noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
538 regTracker.rsTrackRegTrash(reg);
540 noway_assert(!varTypeIsGC(tree->TypeGet()));
541 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
543 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
544 inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
546 if (tree->gtSetFlags())
547 genFlagsEqualToReg(tree, reg);
551 genMarkTreeInReg(tree, reg);
555 /*****************************************************************************
557 * Multiply the specified register by the given value.
558 * 'tree' is the resulting tree
561 void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTree* tree, var_types dstType, bool ovfl)
563 noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
565 regTracker.rsTrackRegTrash(reg);
569 genMarkTreeInReg(tree, reg);
572 bool use_shift = false;
573 unsigned shift_by = 0;
575 if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
578 BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
585 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
586 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
587 if (tree->gtSetFlags())
588 genFlagsEqualToReg(tree, reg);
594 #ifdef _TARGET_XARCH_
595 ins = getEmitter()->inst3opImulForReg(reg);
600 inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
604 /*****************************************************************************/
605 /*****************************************************************************/
606 /*****************************************************************************
608 * Compute the value 'tree' into a register that's in 'needReg'
609 * (or any free register if 'needReg' is RBM_NONE).
611 * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
612 * If keepReg==RegSet::KEEP_REG, we mark the register as being used.
614 * If you require that the register returned is trashable, pass true for 'freeOnly'.
617 void CodeGen::genComputeReg(
618 GenTree* tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
620 noway_assert(tree->gtType != TYP_VOID);
625 #if FEATURE_STACK_FP_X87
626 noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
627 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
628 #elif defined(_TARGET_ARM_)
629 noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
630 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
631 genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
632 genActualType(tree->gtType) == TYP_STRUCT);
634 noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
635 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
636 genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
639 /* Generate the value, hopefully into the right register */
641 genCodeForTree(tree, needReg);
642 noway_assert(tree->InReg());
644 // There is a workaround in genCodeForTreeLng() that changes the type of the
645 // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
646 // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
647 // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
648 // running the rest of this code, because anything looking at gtRegNum on ARM or
649 // attempting to move from EAX/EDX will be wrong.
650 if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
653 reg = tree->gtRegNum;
655 /* Did the value end up in an acceptable register? */
657 if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
659 /* Not good enough to satisfy the caller's orders */
661 if (varTypeIsFloating(tree))
663 RegSet::RegisterPreference pref(needReg, RBM_NONE);
664 rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
668 rg2 = regSet.rsGrabReg(needReg);
673 /* Do we have to end up with a free register? */
678 /* Did we luck out and the value got computed into an unused reg? */
680 if (genRegMask(reg) & regSet.rsRegMaskFree())
683 /* Register already in use, so spill previous value */
685 if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
687 rg2 = regSet.rsGrabReg(needReg);
690 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
691 tree->gtRegNum = reg;
697 /* OK, let's find a trashable home for the value */
699 regMaskTP rv1RegUsed;
701 regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
702 rg2 = regSet.rsPickReg(needReg);
703 regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
707 noway_assert(reg != rg2);
709 /* Update the value in the target register */
711 regTracker.rsTrackRegCopy(rg2, reg);
713 inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
715 /* The value has been transferred to 'reg' */
717 if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
718 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
720 gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
722 /* The value is now in an appropriate register */
724 tree->gtRegNum = rg2;
728 /* Does the caller want us to mark the register as used? */
730 if (keepReg == RegSet::KEEP_REG)
732 /* In case we're computing a value into a register variable */
736 /* Mark the register as 'used' */
738 regSet.rsMarkRegUsed(tree);
742 /*****************************************************************************
744 * Same as genComputeReg(), the only difference being that the result is
745 * guaranteed to end up in a trashable register.
749 void CodeGen::genCompIntoFreeReg(GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg)
751 genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
754 /*****************************************************************************
756 * The value 'tree' was earlier computed into a register; free up that
757 * register (but also make sure the value is presently in a register).
760 void CodeGen::genReleaseReg(GenTree* tree)
762 if (tree->gtFlags & GTF_SPILLED)
764 /* The register has been spilled -- reload it */
766 regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
770 regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
773 /*****************************************************************************
775 * The value 'tree' was earlier computed into a register. Check whether that
776 * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
777 * free the register. The caller shouldn't need to be setting GCness of the register
778 * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
781 void CodeGen::genRecoverReg(GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg)
783 if (tree->gtFlags & GTF_SPILLED)
785 /* The register has been spilled -- reload it */
787 regSet.rsUnspillReg(tree, needReg, keepReg);
790 else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
792 /* We need the tree in another register. So move it there */
794 noway_assert(tree->InReg());
795 regNumber oldReg = tree->gtRegNum;
797 /* Pick an acceptable register */
799 regNumber reg = regSet.rsGrabReg(needReg);
803 inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
804 tree->gtRegNum = reg;
806 gcInfo.gcMarkRegPtrVal(tree);
807 regSet.rsMarkRegUsed(tree);
808 regSet.rsMarkRegFree(oldReg, tree);
810 regTracker.rsTrackRegCopy(reg, oldReg);
813 /* Free the register if the caller desired so */
815 if (keepReg == RegSet::FREE_REG)
817 regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
818 // Can't use RegSet::FREE_REG on a GC type
819 noway_assert(!varTypeIsGC(tree->gtType));
823 noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
827 /*****************************************************************************
829 * Move one half of a register pair to its new regPair(half).
833 void CodeGen::genMoveRegPairHalf(GenTree* tree, regNumber dst, regNumber src, int off)
837 // handle long to unsigned long overflow casts
838 while (tree->gtOper == GT_CAST)
840 noway_assert(tree->gtType == TYP_LONG);
841 tree = tree->gtCast.CastOp();
843 noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
844 noway_assert(tree->gtType == TYP_LONG);
845 inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
846 regTracker.rsTrackRegTrash(dst);
850 regTracker.rsTrackRegCopy(dst, src);
851 inst_RV_RV(INS_mov, dst, src, TYP_INT);
855 /*****************************************************************************
857 * The given long value is in a register pair, but it's not an acceptable
858 * one. We have to move the value into a register pair in 'needReg' (if
859 * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
861 * Important note: if 'needReg' is non-zero, we assume the current pair
862 * has not been marked as free. If, OTOH, 'newPair' is specified, we
863 * assume that the current register pair is marked as used and free it.
866 void CodeGen::genMoveRegPair(GenTree* tree, regMaskTP needReg, regPairNo newPair)
875 /* Either a target set or a specific pair may be requested */
877 noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
879 /* Get hold of the current pair */
881 oldPair = tree->gtRegPair;
882 noway_assert(oldPair != newPair);
884 /* Are we supposed to move to a specific pair? */
886 if (newPair != REG_PAIR_NONE)
888 regMaskTP oldMask = genRegPairMask(oldPair);
889 regMaskTP loMask = genRegMask(genRegPairLo(newPair));
890 regMaskTP hiMask = genRegMask(genRegPairHi(newPair));
891 regMaskTP overlap = oldMask & (loMask | hiMask);
893 /* First lock any registers that are in both pairs */
895 noway_assert((regSet.rsMaskUsed & overlap) == overlap);
896 noway_assert((regSet.rsMaskLock & overlap) == 0);
897 regSet.rsMaskLock |= overlap;
899 /* Make sure any additional registers we need are free */
901 if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
903 regSet.rsGrabReg(loMask);
906 if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
908 regSet.rsGrabReg(hiMask);
911 /* Unlock those registers we have temporarily locked */
913 noway_assert((regSet.rsMaskUsed & overlap) == overlap);
914 noway_assert((regSet.rsMaskLock & overlap) == overlap);
915 regSet.rsMaskLock -= overlap;
917 /* We can now free the old pair */
919 regSet.rsMarkRegFree(oldMask);
923 /* Pick the new pair based on the caller's stated preference */
925 newPair = regSet.rsGrabRegPair(needReg);
928 // If grabbed pair is the same as old one we're done
929 if (newPair == oldPair)
931 noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
932 newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
936 /* Move the values from the old pair into the new one */
938 oldLo = genRegPairLo(oldPair);
939 oldHi = genRegPairHi(oldPair);
940 newLo = genRegPairLo(newPair);
941 newHi = genRegPairHi(newPair);
943 noway_assert(newLo != REG_STK && newHi != REG_STK);
945 /* Careful - the register pairs might overlap */
949 /* The low registers are identical, just move the upper half */
951 noway_assert(newHi != oldHi);
952 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
956 /* The low registers are different, are the upper ones the same? */
960 /* Just move the lower half, then */
961 genMoveRegPairHalf(tree, newLo, oldLo, 0);
965 /* Both sets are different - is there an overlap? */
969 /* Are high and low simply swapped ? */
974 /* Let's use XOR swap to reduce register pressure. */
975 inst_RV_RV(INS_eor, oldLo, oldHi);
976 inst_RV_RV(INS_eor, oldHi, oldLo);
977 inst_RV_RV(INS_eor, oldLo, oldHi);
979 inst_RV_RV(INS_xchg, oldHi, oldLo);
981 regTracker.rsTrackRegSwap(oldHi, oldLo);
985 /* New lower == old higher, so move higher half first */
987 noway_assert(newHi != oldLo);
988 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
989 genMoveRegPairHalf(tree, newLo, oldLo, 0);
994 /* Move lower half first */
995 genMoveRegPairHalf(tree, newLo, oldLo, 0);
996 genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
1001 /* Record the fact that we're switching to another pair */
1003 tree->gtRegPair = newPair;
1006 /*****************************************************************************
1008 * Compute the value 'tree' into the register pair specified by 'needRegPair'
1009 * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
1010 * those in avoidReg.
1011 * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
1012 * value ends up in as being used.
1015 void CodeGen::genComputeRegPair(
1016 GenTree* tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
1021 regMaskTP tmpUsedMask;
1025 noway_assert(isRegPairType(tree->gtType));
1027 if (needRegPair == REG_PAIR_NONE)
1031 regMask = regSet.rsRegMaskFree() & ~avoidReg;
1032 if (genMaxOneBit(regMask))
1033 regMask = regSet.rsRegMaskFree();
1037 regMask = RBM_ALLINT & ~avoidReg;
1040 if (genMaxOneBit(regMask))
1041 regMask = regSet.rsRegMaskCanGrab();
1045 regMask = genRegPairMask(needRegPair);
1048 /* Generate the value, hopefully into the right register pair */
1050 genCodeForTreeLng(tree, regMask, avoidReg);
1052 noway_assert(tree->InReg());
1054 regPair = tree->gtRegPair;
1055 tmpMask = genRegPairMask(regPair);
1057 rLo = genRegPairLo(regPair);
1058 rHi = genRegPairHi(regPair);
1060 /* At least one half is in a real register */
1062 noway_assert(rLo != REG_STK || rHi != REG_STK);
1064 /* Did the value end up in an acceptable register pair? */
1066 if (needRegPair != REG_PAIR_NONE)
1068 if (needRegPair != regPair)
1070 /* This is a workaround. If we specify a regPair for genMoveRegPair */
1071 /* it expects the source pair being marked as used */
1072 regSet.rsMarkRegPairUsed(tree);
1073 genMoveRegPair(tree, 0, needRegPair);
1078 /* Do we have to end up with a free register pair?
1079 Something might have gotten freed up above */
1080 bool mustMoveReg = false;
1082 regMask = regSet.rsRegMaskFree() & ~avoidReg;
1084 if (genMaxOneBit(regMask))
1085 regMask = regSet.rsRegMaskFree();
1087 if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
1089 /* Note that we must call genMoveRegPair if one of our registers
1090 comes from the used mask, so that it will be properly spilled. */
1095 if (genMaxOneBit(regMask))
1096 regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
1098 if (genMaxOneBit(regMask))
1099 regMask |= regSet.rsRegMaskCanGrab();
1101 /* Did the value end up in a free register pair? */
1105 /* We'll have to move the value to a free (trashable) pair */
1106 genMoveRegPair(tree, regMask, REG_PAIR_NONE);
1111 noway_assert(needRegPair == REG_PAIR_NONE);
1112 noway_assert(!freeOnly);
1114 /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
1115 tmpUsedMask = tmpMask & regSet.rsMaskUsed;
1116 tmpMask &= ~regSet.rsMaskUsed;
1118 /* Make sure that the value is in "real" registers*/
1121 /* Get one of the desired registers, but exclude rHi */
1123 regSet.rsLockReg(tmpMask);
1124 regSet.rsLockUsedReg(tmpUsedMask);
1126 regNumber reg = regSet.rsPickReg(regMask);
1128 regSet.rsUnlockUsedReg(tmpUsedMask);
1129 regSet.rsUnlockReg(tmpMask);
1131 inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
1133 tree->gtRegPair = gen2regs2pair(reg, rHi);
1135 regTracker.rsTrackRegTrash(reg);
1136 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1138 else if (rHi == REG_STK)
1140 /* Get one of the desired registers, but exclude rLo */
1142 regSet.rsLockReg(tmpMask);
1143 regSet.rsLockUsedReg(tmpUsedMask);
1145 regNumber reg = regSet.rsPickReg(regMask);
1147 regSet.rsUnlockUsedReg(tmpUsedMask);
1148 regSet.rsUnlockReg(tmpMask);
1150 inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
1152 tree->gtRegPair = gen2regs2pair(rLo, reg);
1154 regTracker.rsTrackRegTrash(reg);
1155 gcInfo.gcMarkRegSetNpt(genRegMask(reg));
1159 /* Does the caller want us to mark the register as used? */
1161 if (keepReg == RegSet::KEEP_REG)
1163 /* In case we're computing a value into a register variable */
1165 genUpdateLife(tree);
1167 /* Mark the register as 'used' */
1169 regSet.rsMarkRegPairUsed(tree);
1173 /*****************************************************************************
1175 * Same as genComputeRegPair(), the only difference being that the result
1176 * is guaranteed to end up in a trashable register pair.
1180 void CodeGen::genCompIntoFreeRegPair(GenTree* tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
1182 genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
1185 /*****************************************************************************
1187 * The value 'tree' was earlier computed into a register pair; free up that
1188 * register pair (but also make sure the value is presently in a register
1192 void CodeGen::genReleaseRegPair(GenTree* tree)
1194 if (tree->gtFlags & GTF_SPILLED)
1196 /* The register has been spilled -- reload it */
1198 regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
1202 regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1205 /*****************************************************************************
1207 * The value 'tree' was earlier computed into a register pair. Check whether
1208 * either register of that pair has been spilled (and reload it if so), and
1209 * if 'keepReg' is 0, free the register pair.
1212 void CodeGen::genRecoverRegPair(GenTree* tree, regPairNo regPair, RegSet::KeepReg keepReg)
1214 if (tree->gtFlags & GTF_SPILLED)
1218 if (regPair == REG_PAIR_NONE)
1221 regMask = genRegPairMask(regPair);
1223 /* The register pair has been spilled -- reload it */
1225 regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
1228 /* Does the caller insist on the value being in a specific place? */
1230 if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
1232 /* No good -- we'll have to move the value to a new place */
1234 genMoveRegPair(tree, 0, regPair);
1236 /* Mark the pair as used if appropriate */
1238 if (keepReg == RegSet::KEEP_REG)
1239 regSet.rsMarkRegPairUsed(tree);
1244 /* Free the register pair if the caller desired so */
1246 if (keepReg == RegSet::FREE_REG)
1247 regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
1250 /*****************************************************************************
1252 * Compute the given long value into the specified register pair; don't mark
1253 * the register pair as used.
1257 void CodeGen::genEvalIntoFreeRegPair(GenTree* tree, regPairNo regPair, regMaskTP avoidReg)
1259 genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
1260 genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
1263 /*****************************************************************************
1264 * This helper makes sure that the regpair target of an assignment is
1265 * available for use. This needs to be called in genCodeForTreeLng just before
1266 * a long assignment, but must not be called until everything has been
1267 * evaluated, or else we might try to spill enregistered variables.
1272 void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
1274 /* Make sure the target of the store is available */
1276 regNumber regLo = genRegPairLo(regPair);
1277 regNumber regHi = genRegPairHi(regPair);
1279 if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
1280 regSet.rsSpillReg(regHi);
1282 if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
1283 regSet.rsSpillReg(regLo);
1286 /*****************************************************************************/
1287 /*****************************************************************************
1289 * Return true if the given tree 'addr' can be computed via an addressing mode,
1290 * such as "[ebx+esi*4+20]". If the expression isn't an address mode already
1291 * try to make it so (but we don't try 'too hard' to accomplish this).
1293 * If we end up needing a register (or two registers) to hold some part(s) of the
1294 * address, we return the use register mask via '*useMaskPtr'.
1296 * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
1297 * in use. The caller would then be responsible for calling
1298 * regSet.rsMarkRegFree(*useMaskPtr).
1300 * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
1301 * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
1304 bool CodeGen::genMakeIndAddrMode(GenTree* addr,
1308 RegSet::KeepReg keepReg,
1309 regMaskTP* useMaskPtr,
1312 if (addr->gtOper == GT_ARR_ELEM)
1314 regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
1322 bool operIsArrIndex; // is oper an array index
1323 GenTree* scaledIndex; // If scaled addressing mode can't be used
1325 regMaskTP anyMask = RBM_ALLINT;
1331 int ixv = INT_MAX; // unset value
1333 GenTree* scaledIndexVal;
1335 regMaskTP newLiveMask;
1339 /* Deferred address mode forming NYI for x86 */
1341 noway_assert(deferOK == false);
1343 noway_assert(oper == NULL ||
1344 ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
1345 ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
1346 operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
1348 if (addr->gtOper == GT_LEA)
1350 rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
1351 GenTreeAddrMode* lea = addr->AsAddrMode();
1355 cns = lea->gtOffset;
1357 if (rv1 != NULL && rv2 == NULL && cns == 0 && rv1->InReg())
1365 // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
1368 /* Is the complete address already sitting in a register? */
1370 if ((addr->InReg()) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
1372 genUpdateLife(addr);
1375 rv2 = scaledIndex = 0;
1381 /* Is it an absolute address */
1383 if (addr->IsCnsIntOrI())
1385 rv1 = rv2 = scaledIndex = 0;
1386 // along this code path cns is never used, so place a BOGUS value in it as proof
1387 // cns = addr->gtIntCon.gtIconVal;
1393 /* Is there a chance of forming an address mode? */
1395 if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
1397 /* This better not be an array index */
1398 noway_assert(!operIsArrIndex);
1402 // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
1405 /* For scaled array access, RV2 may not be pointing to the index of the
1406 array if the CPU does not support the needed scaling factor. We will
1407 make it point to the actual index, and scaledIndex will point to
1411 scaledIndexVal = NULL;
1413 if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
1414 rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
1417 compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
1419 noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
1422 /* Has the address already been computed? */
1432 genUpdateLife(addr);
1437 Here we have the following operands:
1439 rv1 ..... base address
1440 rv2 ..... offset value (or NULL)
1441 mul ..... multiplier for rv2 (or 0)
1442 cns ..... additional constant (or 0)
1444 The first operand must be present (and be an address) unless we're
1445 computing an expression via 'LEA'. The scaled operand is optional,
1446 but must not be a pointer if present.
1449 noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
1451 /*-------------------------------------------------------------------------
1453 * Make sure both rv1 and rv2 (if present) are in registers
1457 // Trivial case : Is either rv1 or rv2 a NULL ?
1461 /* A single operand, make sure it's in a register */
1465 // In the case where "rv1" is already in a register, there's no reason to get into a
1466 // register in "regMask" yet, if there's a non-zero constant that we're going to add;
1467 // if there is, we can do an LEA.
1468 genCodeForTree(rv1, RBM_NONE);
1472 genCodeForTree(rv1, regMask);
1478 /* A single (scaled) operand, make sure it's in a register */
1480 genCodeForTree(rv2, 0);
1484 /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
1485 they are in registers */
1487 noway_assert(rv1 && rv2);
1489 /* If we have to check a constant array index, compare it against
1490 the array dimension (see below) but then fold the index with a
1491 scaling factor (if any) and additional offset (if any).
1494 if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
1496 if (scaledIndex != NULL)
1498 assert(rv2 == scaledIndex && scaledIndexVal != NULL);
1499 rv2 = scaledIndexVal;
1501 /* We must have a range-checked index operation */
1503 noway_assert(operIsArrIndex);
1505 /* Get hold of the index value and see if it's a constant */
1507 if (rv2->IsIntCnsFitsInI32())
1509 ixv = (int)rv2->gtIntCon.gtIconVal;
1510 // Maybe I should just set "fold" true in the call to genMakeAddressable above.
1511 if (scaledIndex != NULL)
1513 int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
1524 rv2 = scaledIndex = NULL;
1526 /* Add the scaled index into the added value */
1533 /* Make sure 'rv1' is in a register */
1535 genCodeForTree(rv1, regMask);
1543 /* op1 already in register - how about op2? */
1547 /* Great - both operands are in registers already. Just update
1548 the liveness and we are done. */
1564 /* rv1 is in a register, but rv2 isn't */
1568 /* rv1 is already materialized in a register. Just update liveness
1569 to rv1 and generate code for rv2 */
1572 regSet.rsMarkRegUsed(rv1, oper);
1577 else if (rv2->InReg())
1579 /* rv2 is in a register, but rv1 isn't */
1581 noway_assert(rv2->gtOper == GT_REG_VAR);
1585 /* rv2 is already materialized in a register. Update liveness
1586 to after rv2 and then hang on to rv2 */
1589 regSet.rsMarkRegUsed(rv2, oper);
1592 /* Generate the for the first operand */
1594 genCodeForTree(rv1, regMask);
1598 // Free up rv2 in the right fashion (it might be re-marked if keepReg)
1599 regSet.rsMarkRegUsed(rv1, oper);
1600 regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1602 regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1607 /* We have evaluated rv1, and now we just need to update liveness
1608 to rv2 which was already in a register */
1619 /* Make sure we preserve the correct operand order */
1623 /* Generate the second operand first */
1625 // Determine what registers go live between rv2 and rv1
1626 newLiveMask = genNewLiveRegMask(rv2, rv1);
1628 rv2Mask = regMask & ~newLiveMask;
1629 rv2Mask &= ~rv1->gtRsvdRegs;
1631 if (rv2Mask == RBM_NONE)
1633 // The regMask hint cannot be honored
1634 // We probably have a call that trashes the register(s) in regMask
1635 // so ignore the regMask hint, but try to avoid using
1636 // the registers in newLiveMask and the rv1->gtRsvdRegs
1638 rv2Mask = RBM_ALLINT & ~newLiveMask;
1639 rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
1642 genCodeForTree(rv2, rv2Mask);
1643 regMask &= ~genRegMask(rv2->gtRegNum);
1645 regSet.rsMarkRegUsed(rv2, oper);
1647 /* Generate the first operand second */
1649 genCodeForTree(rv1, regMask);
1650 regSet.rsMarkRegUsed(rv1, oper);
1652 /* Free up both operands in the right order (they might be
1653 re-marked as used below)
1655 regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
1657 regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
1662 /* Get the first operand into a register */
1664 // Determine what registers go live between rv1 and rv2
1665 newLiveMask = genNewLiveRegMask(rv1, rv2);
1667 rv1Mask = regMask & ~newLiveMask;
1668 rv1Mask &= ~rv2->gtRsvdRegs;
1670 if (rv1Mask == RBM_NONE)
1672 // The regMask hint cannot be honored
1673 // We probably have a call that trashes the register(s) in regMask
1674 // so ignore the regMask hint, but try to avoid using
1675 // the registers in liveMask and the rv2->gtRsvdRegs
1677 rv1Mask = RBM_ALLINT & ~newLiveMask;
1678 rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
1681 genCodeForTree(rv1, rv1Mask);
1682 regSet.rsMarkRegUsed(rv1, oper);
1686 /* Here, we need to get rv2 in a register. We have either already
1687 materialized rv1 into a register, or it was already in a one */
1689 noway_assert(rv1->InReg());
1690 noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
1692 /* Generate the second operand as well */
1694 regMask &= ~genRegMask(rv1->gtRegNum);
1695 genCodeForTree(rv2, regMask);
1699 /* rev==true means the evaluation order is rv2,rv1. We just
1700 evaluated rv2, and rv1 was already in a register. Just
1701 update liveness to rv1 and we are done. */
1707 /* We have evaluated rv1 and rv2. Free up both operands in
1708 the right order (they might be re-marked as used below) */
1710 /* Even though we have not explicitly marked rv2 as used,
1711 rv2->gtRegNum may be used if rv2 is a multi-use or
1712 an enregistered variable. */
1714 regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
1716 /* Check for special case both rv1 and rv2 are the same register */
1717 if (rv2Used != genRegMask(rv1->gtRegNum))
1720 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1724 regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
1730 /*-------------------------------------------------------------------------
1732 * At this point, both rv1 and rv2 (if present) are in registers
1738 /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
1740 if (rv1 && !(rv1->InReg()))
1742 if (rv2 && !(rv2->InReg()))
1747 // *(intVar1+intVar1) causes problems as we
1748 // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
1749 // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
1750 // that currently as we return a single mask in useMaskPtr.
1752 if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && rv1->InReg() && rv2->InReg())
1754 if (rv1->gtRegNum == rv2->gtRegNum)
1756 noway_assert(!operIsArrIndex);
1761 /* Check either register operand to see if it needs to be saved */
1765 noway_assert(rv1->InReg());
1767 if (keepReg == RegSet::KEEP_REG)
1769 regSet.rsMarkRegUsed(rv1, oper);
1773 /* If the register holds an address, mark it */
1775 gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
1781 noway_assert(rv2->InReg());
1783 if (keepReg == RegSet::KEEP_REG)
1784 regSet.rsMarkRegUsed(rv2, oper);
1789 noway_assert(!scaledIndex);
1793 /* Compute the set of registers the address depends on */
1795 regMaskTP useMask = RBM_NONE;
1799 if (rv1->gtFlags & GTF_SPILLED)
1800 regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
1802 noway_assert(rv1->InReg());
1803 useMask |= genRegMask(rv1->gtRegNum);
1808 if (rv2->gtFlags & GTF_SPILLED)
1812 regMaskTP lregMask = genRegMask(rv1->gtRegNum);
1815 regSet.rsLockReg(lregMask, &used);
1816 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1817 regSet.rsUnlockReg(lregMask, used);
1820 regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
1822 noway_assert(rv2->InReg());
1823 useMask |= genRegMask(rv2->gtRegNum);
1826 /* Tell the caller which registers we need to hang on to */
1828 *useMaskPtr = useMask;
1833 /*****************************************************************************
1835 * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
1838 void CodeGen::genRangeCheck(GenTree* oper)
1840 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1841 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1843 GenTree* arrLen = bndsChk->gtArrLen;
1844 GenTree* arrRef = NULL;
1847 /* Is the array index a constant value? */
1848 GenTree* index = bndsChk->gtIndex;
1849 if (!index->IsCnsIntOrI())
1851 // No, it's not a constant.
1852 genCodeForTree(index, RBM_ALLINT);
1853 regSet.rsMarkRegUsed(index);
1856 // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
1857 // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
1860 if (arrLen->OperGet() == GT_ARR_LENGTH)
1862 GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
1863 lenOffset = arrLenExact->ArrLenOffset();
1865 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1866 // We always load the length into a register on ARM and x64.
1868 // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
1869 // lengths, but the index expression *can* be native int (64-bits)
1870 arrRef = arrLenExact->ArrRef();
1871 genCodeForTree(arrRef, RBM_ALLINT);
1872 noway_assert(arrRef->InReg());
1873 regSet.rsMarkRegUsed(arrRef);
1874 noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1877 #if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
1878 // This is another form in which we have an array reference and a constant length. Don't use
1879 // on LOAD_STORE or 64BIT.
1880 else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
1882 genCodeForTree(arrRef, RBM_ALLINT);
1883 noway_assert(arrRef->InReg());
1884 regSet.rsMarkRegUsed(arrRef);
1885 noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
1889 // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
1892 // (Unless it's a constant.)
1893 if (!arrLen->IsCnsIntOrI())
1895 genCodeForTree(arrLen, RBM_ALLINT);
1896 regSet.rsMarkRegUsed(arrLen);
1898 noway_assert(arrLen->InReg());
1899 noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
1903 if (!index->IsCnsIntOrI())
1905 // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
1906 // from its register, get it back in a register.
1907 regMaskTP indRegMask = RBM_ALLINT;
1908 regMaskTP arrRegMask = RBM_ALLINT;
1909 if (!(index->gtFlags & GTF_SPILLED))
1910 arrRegMask = ~genRegMask(index->gtRegNum);
1913 genRecoverReg(arrRef, arrRegMask, RegSet::KEEP_REG);
1914 indRegMask &= ~genRegMask(arrRef->gtRegNum);
1916 else if (!arrLen->IsCnsIntOrI())
1918 genRecoverReg(arrLen, arrRegMask, RegSet::KEEP_REG);
1919 indRegMask &= ~genRegMask(arrLen->gtRegNum);
1921 if (index->gtFlags & GTF_SPILLED)
1922 regSet.rsUnspillReg(index, indRegMask, RegSet::KEEP_REG);
1924 /* Make sure we have the values we expect */
1925 noway_assert(index->InReg());
1926 noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
1928 noway_assert(index->TypeGet() == TYP_I_IMPL ||
1929 (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
1930 var_types indxType = index->TypeGet();
1931 if (indxType != TYP_I_IMPL)
1935 { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1937 /* Generate "cmp index, [arrRef+LenOffs]" */
1938 inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
1940 else if (arrLen->IsCnsIntOrI())
1942 ssize_t len = arrLen->AsIntConCommon()->IconValue();
1943 inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
1947 inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
1950 /* Generate "jae <fail_label>" */
1952 noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
1953 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1954 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1958 /* Generate "cmp [rv1+LenOffs], cns" */
1960 bool indIsInt = true;
1961 #ifdef _TARGET_64BIT_
1963 ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1964 if (ixvFull > INT32_MAX)
1973 ssize_t ixvFull = index->AsIntConCommon()->IconValue();
1974 int ixv = (int)ixvFull;
1976 if (arrRef != NULL && indIsInt)
1977 { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
1978 /* Generate "cmp [arrRef+LenOffs], ixv" */
1979 inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
1980 // Generate "jbe <fail_label>"
1981 emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1982 genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1984 else if (arrLen->IsCnsIntOrI())
1986 ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
1987 // Both are constants; decide at compile time.
1988 if (!(0 <= ixvFull && ixvFull < lenv))
1990 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1995 genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1999 /* Generate "cmp arrLen, ixv" */
2000 inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
2001 // Generate "jbe <fail_label>"
2002 emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
2003 genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
2007 // Free the registers that were used.
2008 if (!index->IsCnsIntOrI())
2010 genReleaseReg(index);
2015 genReleaseReg(arrRef);
2017 else if (!arrLen->IsCnsIntOrI())
2019 genReleaseReg(arrLen);
2023 /*****************************************************************************
2025 * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
2026 * Otherwise, check if rvalue is in register. If so, mark it. Then
2027 * call genMakeAddressable(). Needed because genMakeAddressable is used
2028 * for both lvalue and rvalue, and we only can do this for rvalue.
2032 regMaskTP CodeGen::genMakeRvalueAddressable(
2033 GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
2039 if (tree->gtOper == GT_LCL_VAR)
2041 reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
2043 if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
2045 noway_assert(!isRegPairType(tree->gtType));
2047 genMarkTreeInReg(tree, reg);
2053 return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
2056 /*****************************************************************************/
2058 bool CodeGen::genIsLocalLastUse(GenTree* tree)
2060 const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
2062 noway_assert(tree->OperGet() == GT_LCL_VAR);
2063 noway_assert(varDsc->lvTracked);
2065 return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
2068 /*****************************************************************************
2070 * This is genMakeAddressable(GT_ARR_ELEM).
2071 * Makes the array-element addressible and returns the addressibility registers.
2072 * It also marks them as used if keepReg==RegSet::KEEP_REG.
2073 * tree is the dependant tree.
2075 * Note that an array-element needs 2 registers to be addressibile, the
2076 * array-object and the offset. This function marks gtArrObj and gtArrInds[0]
2077 * with the 2 registers so that other functions (like instGetAddrMode()) know
2078 * where to look for the offset to use.
2081 regMaskTP CodeGen::genMakeAddrArrElem(GenTree* arrElem, GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg)
2083 noway_assert(arrElem->gtOper == GT_ARR_ELEM);
2084 noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
2086 /* Evaluate all the operands. We don't evaluate them into registers yet
2087 as GT_ARR_ELEM does not reorder the evaluation of the operands, and
2088 hence may use a sub-optimal ordering. We try to improve this
2089 situation somewhat by accessing the operands in stages
2090 (genMakeAddressable2 + genComputeAddressable and
2091 genCompIntoFreeReg + genRecoverReg).
2093 Note: we compute operands into free regs to avoid multiple uses of
2094 the same register. Multi-use would cause problems when we free
2095 registers in FIFO order instead of the assumed LIFO order that
2096 applies to all type of tree nodes except for GT_ARR_ELEM.
2099 GenTree* arrObj = arrElem->gtArrElem.gtArrObj;
2100 unsigned rank = arrElem->gtArrElem.gtArrRank;
2101 var_types elemType = arrElem->gtArrElem.gtArrElemType;
2102 regMaskTP addrReg = RBM_NONE;
2103 regMaskTP regNeed = RBM_ALLINT;
2105 #if !NOGC_WRITE_BARRIERS
2106 // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
2107 // since the arrObj participates in the lea/add instruction
2108 // that computes ARG_0 we should avoid putting it in ARG_1
2110 if (varTypeIsGC(elemType))
2112 regNeed &= ~RBM_ARG_1;
2116 // Strip off any comma expression.
2117 arrObj = genCodeForCommaTree(arrObj);
2119 // Having generated the code for the comma, we don't care about it anymore.
2120 arrElem->gtArrElem.gtArrObj = arrObj;
2122 // If the array ref is a stack var that's dying here we have to move it
2123 // into a register (regalloc already counts of this), as if it's a GC pointer
2124 // it can be collected from here on. This is not an issue for locals that are
2125 // in a register, as they get marked as used an will be tracked.
2126 // The bug that caused this is #100776. (untracked vars?)
2127 if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
2128 !genMarkLclVar(arrObj))
2130 genCodeForTree(arrObj, regNeed);
2131 regSet.rsMarkRegUsed(arrObj, 0);
2132 addrReg = genRegMask(arrObj->gtRegNum);
2136 addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
2137 true, // forLoadStore
2140 true); // evalSideEffs
2144 for (dim = 0; dim < rank; dim++)
2145 genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
2147 /* Ensure that the array-object is in a register */
2149 addrReg = genKeepAddressable(arrObj, addrReg);
2150 genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
2152 regNumber arrReg = arrObj->gtRegNum;
2153 regMaskTP arrRegMask = genRegMask(arrReg);
2154 regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
2155 regSet.rsLockUsedReg(arrRegMask);
2157 /* Now process all the indices, do the range check, and compute
2158 the offset of the element */
2160 regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
2162 for (dim = 0; dim < rank; dim++)
2164 GenTree* index = arrElem->gtArrElem.gtArrInds[dim];
2166 /* Get the index into a free register (other than the register holding the array) */
2168 genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
2170 #if CPU_LOAD_STORE_ARCH
2171 /* Subtract the lower bound, and do the range check */
2173 regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
2174 getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2175 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2176 regTracker.rsTrackRegTrash(valueReg);
2177 getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
2178 regTracker.rsTrackRegTrash(index->gtRegNum);
2180 getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2181 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2182 getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
2184 /* Subtract the lower bound, and do the range check */
2185 getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
2186 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
2187 regTracker.rsTrackRegTrash(index->gtRegNum);
2189 getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
2190 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2192 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
2193 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
2197 /* Hang on to the register of the first index */
2199 noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
2200 accReg = index->gtRegNum;
2201 noway_assert(accReg != arrReg);
2202 regSet.rsLockUsedReg(genRegMask(accReg));
2206 /* Evaluate accReg = accReg*dim_size + index */
2208 noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
2209 #if CPU_LOAD_STORE_ARCH
2210 getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
2211 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2212 regTracker.rsTrackRegTrash(valueReg);
2213 getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
2215 getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
2216 compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
2219 inst_RV_RV(INS_add, accReg, index->gtRegNum);
2220 regSet.rsMarkRegFree(index->gtRegNum, index);
2221 regTracker.rsTrackRegTrash(accReg);
2225 if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
2227 regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
2229 getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
2230 regTracker.rsTrackRegTrash(accReg);
2233 regSet.rsUnlockUsedReg(genRegMask(arrReg));
2234 regSet.rsUnlockUsedReg(genRegMask(accReg));
2236 regSet.rsMarkRegFree(genRegMask(arrReg));
2237 regSet.rsMarkRegFree(genRegMask(accReg));
2239 if (keepReg == RegSet::KEEP_REG)
2241 /* We mark the addressability registers on arrObj and gtArrInds[0].
2242 instGetAddrMode() knows to work with this. */
2244 regSet.rsMarkRegUsed(arrObj, tree);
2245 regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
2248 return genRegMask(arrReg) | genRegMask(accReg);
2251 /*****************************************************************************
2253 * Make sure the given tree is addressable. 'needReg' is a mask that indicates
2254 * the set of registers we would prefer the destination tree to be computed
2255 * into (RBM_NONE means no preference).
2257 * 'tree' can subsequently be used with the inst_XX_TT() family of functions.
2259 * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
2260 * on as used, and return the mask for that register set (if no registers
2261 * are marked as used, RBM_NONE is returned).
2263 * If 'smallOK' is not true and the datatype being address is a byte or short,
2264 * then the tree is forced into a register. This is useful when the machine
2265 * instruction being emitted does not have a byte or short version.
2267 * The "deferOK" parameter indicates the mode of operation - when it's false,
2268 * upon returning an actual address mode must have been formed (i.e. it must
2269 * be possible to immediately call one of the inst_TT methods to operate on
2270 * the value). When "deferOK" is true, we do whatever it takes to be ready
2271 * to form the address mode later - for example, if an index address mode on
2272 * a particular CPU requires the use of a specific register, we usually don't
2273 * want to immediately grab that register for an address mode that will only
2274 * be needed later. The convention is to call genMakeAddressable() with
2275 * "deferOK" equal to true, do whatever work is needed to prepare the other
2276 * operand, call genMakeAddressable() with "deferOK" equal to false, and
2277 * finally call one of the inst_TT methods right after that.
2279 * If we do any other codegen after genMakeAddressable(tree) which can
2280 * potentially spill the addressability registers, genKeepAddressable()
2281 * needs to be called before accessing the tree again.
2283 * genDoneAddressable() needs to be called when we are done with the tree
2284 * to free the addressability registers.
2287 regMaskTP CodeGen::genMakeAddressable(
2288 GenTree* tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
2290 GenTree* addr = NULL;
2293 /* Is the value simply sitting in a register? */
2297 genUpdateLife(tree);
2302 // TODO: If the value is for example a cast of float -> int, compute
2303 // TODO: the converted value into a stack temp, and leave it there,
2304 // TODO: since stack temps are always addressable. This would require
2305 // TODO: recording the fact that a particular tree is in a stack temp.
2307 /* byte/char/short operand -- is this acceptable to the caller? */
2309 if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
2312 // Evaluate non-last elements of comma expressions, to get to the last.
2313 tree = genCodeForCommaTree(tree);
2315 switch (tree->gtOper)
2319 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
2320 // to worry about it being enregistered.
2321 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
2323 genUpdateLife(tree);
2328 if (!genMarkLclVar(tree))
2330 genUpdateLife(tree);
2334 __fallthrough; // it turns out the variable lives in a register
2338 genUpdateLife(tree);
2347 #ifdef _TARGET_64BIT_
2348 // Non-relocs will be sign extended, so we don't have to enregister
2349 // constants that are equivalent to a sign-extended int.
2350 // Relocs can be left alone if they are RIP-relative.
2351 if ((genTypeSize(tree->TypeGet()) > 4) &&
2352 (!tree->IsIntCnsFitsInI32() ||
2353 (tree->IsIconHandle() &&
2354 (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
2358 #endif // _TARGET_64BIT_
2363 // For MinOpts, we don't do constant folding, so we have
2364 // constants showing up in places we don't like.
2365 // force them into a register now to prevent that.
2366 if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
2373 /* Try to make the address directly addressable */
2375 if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2376 needReg, keepReg, ®Mask, deferOK))
2378 genUpdateLife(tree);
2382 /* No good, we'll have to load the address into a register */
2385 tree = tree->gtOp.gtOp1;
2394 /* Here we need to compute the value 'tree' into a register */
2396 genCodeForTree(tree, needReg);
2400 noway_assert(tree->InReg());
2402 if (isRegPairType(tree->gtType))
2404 /* Are we supposed to hang on to the register? */
2406 if (keepReg == RegSet::KEEP_REG)
2407 regSet.rsMarkRegPairUsed(tree);
2409 regMask = genRegPairMask(tree->gtRegPair);
2413 /* Are we supposed to hang on to the register? */
2415 if (keepReg == RegSet::KEEP_REG)
2416 regSet.rsMarkRegUsed(tree, addr);
2418 regMask = genRegMask(tree->gtRegNum);
2424 /*****************************************************************************
2425 * Compute a tree (which was previously made addressable using
2426 * genMakeAddressable()) into a register.
2427 * needReg - mask of preferred registers.
2428 * keepReg - should the computed register be marked as used by the tree
2429 * freeOnly - target register needs to be a scratch register
2432 void CodeGen::genComputeAddressable(GenTree* tree,
2434 RegSet::KeepReg keptReg,
2436 RegSet::KeepReg keepReg,
2439 noway_assert(genStillAddressable(tree));
2440 noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
2442 genDoneAddressable(tree, addrReg, keptReg);
2448 reg = tree->gtRegNum;
2450 if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
2455 if (tree->OperIsConst())
2457 /* Need to handle consts separately as we don't want to emit
2458 "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
2459 handles consts better for SMALL_CODE */
2461 noway_assert(tree->IsCnsIntOrI());
2462 reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
2467 reg = regSet.rsPickReg(needReg);
2469 inst_RV_TT(INS_mov, reg, tree);
2470 regTracker.rsTrackRegTrash(reg);
2474 genMarkTreeInReg(tree, reg);
2476 if (keepReg == RegSet::KEEP_REG)
2477 regSet.rsMarkRegUsed(tree);
2479 gcInfo.gcMarkRegPtrVal(tree);
2482 /*****************************************************************************
2483 * Should be similar to genMakeAddressable() but gives more control.
2486 regMaskTP CodeGen::genMakeAddressable2(GenTree* tree,
2488 RegSet::KeepReg keepReg,
2495 bool evalToReg = false;
2497 if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
2500 #if CPU_LOAD_STORE_ARCH
2507 genCodeForTree(tree, needReg);
2509 noway_assert(tree->InReg());
2511 if (isRegPairType(tree->gtType))
2513 /* Are we supposed to hang on to the register? */
2515 if (keepReg == RegSet::KEEP_REG)
2516 regSet.rsMarkRegPairUsed(tree);
2518 return genRegPairMask(tree->gtRegPair);
2522 /* Are we supposed to hang on to the register? */
2524 if (keepReg == RegSet::KEEP_REG)
2525 regSet.rsMarkRegUsed(tree);
2527 return genRegMask(tree->gtRegNum);
2532 return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
2536 /*****************************************************************************
2538 * The given tree was previously passed to genMakeAddressable(); return
2539 * 'true' if the operand is still addressable.
2543 bool CodeGen::genStillAddressable(GenTree* tree)
2545 /* Has the value (or one or more of its sub-operands) been spilled? */
2547 if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
2553 /*****************************************************************************
2555 * Recursive helper to restore complex address modes. The 'lockPhase'
2556 * argument indicates whether we're in the 'lock' or 'reload' phase.
2559 regMaskTP CodeGen::genRestoreAddrMode(GenTree* addr, GenTree* tree, bool lockPhase)
2561 regMaskTP regMask = RBM_NONE;
2563 /* Have we found a spilled value? */
2565 if (tree->gtFlags & GTF_SPILLED)
2567 /* Do nothing if we're locking, otherwise reload and lock */
2571 /* Unspill the register */
2573 regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
2575 /* The value should now be sitting in a register */
2577 noway_assert(tree->InReg());
2578 regMask = genRegMask(tree->gtRegNum);
2580 /* Mark the register as used for the address */
2582 regSet.rsMarkRegUsed(tree, addr);
2584 /* Lock the register until we're done with the entire address */
2586 regSet.rsMaskLock |= regMask;
2592 /* Is this sub-tree sitting in a register? */
2596 regMask = genRegMask(tree->gtRegNum);
2598 /* Lock the register if we're in the locking phase */
2601 regSet.rsMaskLock |= regMask;
2605 /* Process any sub-operands of this node */
2607 unsigned kind = tree->OperKind();
2609 if (kind & GTK_SMPOP)
2611 /* Unary/binary operator */
2613 if (tree->gtOp.gtOp1)
2614 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
2615 if (tree->gtGetOp2IfPresent())
2616 regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
2618 else if (tree->gtOper == GT_ARR_ELEM)
2620 /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
2621 which holds the offset-calculation */
2623 regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
2624 regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
2626 else if (tree->gtOper == GT_CMPXCHG)
2628 regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
2632 /* Must be a leaf/constant node */
2634 noway_assert(kind & (GTK_LEAF | GTK_CONST));
2641 /*****************************************************************************
2643 * The given tree was previously passed to genMakeAddressable, but since then
2644 * some of its registers are known to have been spilled; do whatever it takes
2645 * to make the operand addressable again (typically by reloading any spilled
2649 regMaskTP CodeGen::genRestAddressable(GenTree* tree, regMaskTP addrReg, regMaskTP lockMask)
2651 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2653 /* Is this a 'simple' register spill? */
2655 if (tree->gtFlags & GTF_SPILLED)
2657 /* The mask must match the original register/regpair */
2659 if (isRegPairType(tree->gtType))
2661 noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
2663 regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2665 addrReg = genRegPairMask(tree->gtRegPair);
2669 noway_assert(addrReg == genRegMask(tree->gtRegNum));
2671 regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
2673 addrReg = genRegMask(tree->gtRegNum);
2676 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2677 regSet.rsMaskLock -= lockMask;
2682 /* We have a complex address mode with some of its sub-operands spilled */
2684 noway_assert((tree->InReg()) == 0);
2685 noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
2688 We'll proceed in several phases:
2690 1. Lock any registers that are part of the address mode and
2691 have not been spilled. This prevents these registers from
2692 getting spilled in step 2.
2694 2. Reload any registers that have been spilled; lock each
2695 one right after it is reloaded.
2697 3. Unlock all the registers.
2700 addrReg = genRestoreAddrMode(tree, tree, true);
2701 addrReg |= genRestoreAddrMode(tree, tree, false);
2703 /* Unlock all registers that the address mode uses */
2705 lockMask |= addrReg;
2707 noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
2708 regSet.rsMaskLock -= lockMask;
2713 /*****************************************************************************
2715 * The given tree was previously passed to genMakeAddressable, but since then
2716 * some of its registers might have been spilled ('addrReg' is the set of
2717 * registers used by the address). This function makes sure the operand is
2718 * still addressable (while avoiding any of the registers in 'avoidMask'),
2719 * and returns the (possibly modified) set of registers that are used by
2720 * the address (these will be marked as used on exit).
2723 regMaskTP CodeGen::genKeepAddressable(GenTree* tree, regMaskTP addrReg, regMaskTP avoidMask)
2725 /* Is the operand still addressable? */
2727 tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
2729 if (!genStillAddressable(tree))
2733 // Temporarily lock 'avoidMask' while we restore addressability
2734 // genRestAddressable will unlock the 'avoidMask' for us
2735 // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
2736 // In regSet.rsRegMaskFree() we require that all locked register be marked as used
2738 regSet.rsLockUsedReg(avoidMask);
2741 addrReg = genRestAddressable(tree, addrReg, avoidMask);
2743 noway_assert((regSet.rsMaskLock & avoidMask) == 0);
2749 /*****************************************************************************
2751 * After we're finished with the given operand (which was previously marked
2752 * by calling genMakeAddressable), this function must be called to free any
2753 * registers that may have been used by the address.
2754 * keptReg indicates if the addressability registers were marked as used
2755 * by genMakeAddressable().
2758 void CodeGen::genDoneAddressable(GenTree* tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
2760 if (keptReg == RegSet::FREE_REG)
2762 // We exclude regSet.rsMaskUsed since the registers may be multi-used.
2763 // ie. There may be a pending use in a higher-up tree.
2765 addrReg &= ~regSet.rsMaskUsed;
2767 /* addrReg was not marked as used. So just reset its GC info */
2770 gcInfo.gcMarkRegSetNpt(addrReg);
2775 /* addrReg was marked as used. So we need to free it up (which
2776 will also reset its GC info) */
2778 regSet.rsMarkRegFree(addrReg);
2782 /*****************************************************************************/
2783 /*****************************************************************************
2785 * Make sure the given floating point value is addressable, and return a tree
2786 * that will yield the value as an addressing mode (this tree may differ from
2787 * the one passed in, BTW). If the only way to make the value addressable is
2788 * to evaluate into the FP stack, we do this and return zero.
2791 GenTree* CodeGen::genMakeAddrOrFPstk(GenTree* tree, regMaskTP* regMaskPtr, bool roundResult)
2795 switch (tree->gtOper)
2803 if (tree->gtType == TYP_FLOAT)
2805 float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
2806 return genMakeConst(&f, TYP_FLOAT, tree, false);
2808 return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
2813 /* Try to make the address directly addressable */
2815 if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
2816 0, RegSet::FREE_REG, regMaskPtr, false))
2818 genUpdateLife(tree);
2827 #if FEATURE_STACK_FP_X87
2828 /* We have no choice but to compute the value 'tree' onto the FP stack */
2830 genCodeForTreeFlt(tree);
2835 /*****************************************************************************/
2836 /*****************************************************************************
2838 * Display a string literal value (debug only).
2844 /*****************************************************************************
2846 * Generate code to check that the GS cookie wasn't thrashed by a buffer
2847 * overrun. If pushReg is true, preserve all registers around code sequence.
2848 * Otherwise, ECX maybe modified.
2850 void CodeGen::genEmitGSCookieCheck(bool pushReg)
2852 // Make sure that EAX didn't die in the return expression
2853 if (!pushReg && (compiler->info.compRetType == TYP_REF))
2854 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
2856 // Add cookie check code for unsafe buffers
2857 BasicBlock* gsCheckBlk;
2858 regMaskTP byrefPushedRegs = RBM_NONE;
2859 regMaskTP norefPushedRegs = RBM_NONE;
2860 regMaskTP pushedRegs = RBM_NONE;
2862 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
2864 #if CPU_LOAD_STORE_ARCH
2865 // Lock all ABI argument registers before generating the check. All other registers should be dead, so this
2866 // shouldn't over-constrain us.
2867 const regMaskTP unlockedArgRegs = RBM_ARG_REGS & ~regSet.rsMaskLock;
2868 regMaskTP usedArgRegs;
2869 regSet.rsLockReg(unlockedArgRegs, &usedArgRegs);
2872 if (compiler->gsGlobalSecurityCookieAddr == NULL)
2875 CLANG_FORMAT_COMMENT_ANCHOR;
2877 #if CPU_LOAD_STORE_ARCH
2878 regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
2879 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
2880 regTracker.rsTrackRegTrash(reg);
2882 if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
2883 arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
2885 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
2889 // Load CookieVal into a register
2890 regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
2891 instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
2892 getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
2895 getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
2896 (int)compiler->gsGlobalSecurityCookieVal);
2901 regNumber regGSCheck;
2902 regMaskTP regMaskGSCheck;
2903 #if CPU_LOAD_STORE_ARCH
2904 regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
2905 regMaskGSCheck = genRegMask(regGSCheck);
2907 // Don't pick the 'this' register
2908 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
2909 (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
2911 regGSCheck = REG_EDX;
2912 regMaskGSCheck = RBM_EDX;
2916 regGSCheck = REG_ECX;
2917 regMaskGSCheck = RBM_ECX;
2921 if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
2923 pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
2927 noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
2930 #if defined(_TARGET_ARM_)
2931 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2932 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
2934 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
2935 (ssize_t)compiler->gsGlobalSecurityCookieAddr);
2936 #endif // !_TARGET_ARM_
2937 regTracker.rsTrashRegSet(regMaskGSCheck);
2939 regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
2940 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
2941 regTracker.rsTrackRegTrash(regTmp);
2942 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
2944 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
2948 gsCheckBlk = genCreateTempLabel();
2949 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
2950 inst_JMP(jmpEqual, gsCheckBlk);
2951 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
2952 genDefineTempLabel(gsCheckBlk);
2954 genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
2956 #if CPU_LOAD_STORE_ARCH
2957 // Unlock all ABI argument registers.
2958 regSet.rsUnlockReg(unlockedArgRegs, usedArgRegs);
2962 /*****************************************************************************
2964 * Generate any side effects within the given expression tree.
2967 void CodeGen::genEvalSideEffects(GenTree* tree)
2974 /* Does this sub-tree contain any side-effects? */
2975 if (tree->gtFlags & GTF_SIDE_EFFECT)
2977 #if FEATURE_STACK_FP_X87
2978 /* Remember the current FP stack level */
2979 int iTemps = genNumberTemps();
2981 if (tree->OperIsIndir())
2983 regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
2987 gcInfo.gcMarkRegPtrVal(tree);
2988 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
2990 // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
2991 // do not need an additional null-check
2992 /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
2993 else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
2995 /* Compare against any register to do null-check */
2996 CLANG_FORMAT_COMMENT_ANCHOR;
2998 #if defined(_TARGET_XARCH_)
2999 inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
3000 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3001 #elif CPU_LOAD_STORE_ARCH
3002 if (varTypeIsFloating(tree->TypeGet()))
3004 genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
3009 genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
3012 if (tree->gtFlags & GTF_IND_VOLATILE)
3014 // Emit a memory barrier instruction after the load
3015 instGen_MemoryBarrier();
3024 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
3029 /* Generate the expression and throw it away */
3030 genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
3033 gcInfo.gcMarkRegPtrVal(tree);
3036 #if FEATURE_STACK_FP_X87
3037 /* If the tree computed a value on the FP stack, pop the stack */
3038 if (genNumberTemps() > iTemps)
3040 noway_assert(genNumberTemps() == iTemps + 1);
3041 genDiscardStackFP(tree);
3047 noway_assert(tree->gtOper != GT_ASG);
3049 /* Walk the tree, just to mark any dead values appropriately */
3051 oper = tree->OperGet();
3052 kind = tree->OperKind();
3054 /* Is this a constant or leaf node? */
3056 if (kind & (GTK_CONST | GTK_LEAF))
3058 #if FEATURE_STACK_FP_X87
3059 if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
3061 genRegVarDeathStackFP(tree);
3062 FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
3065 genUpdateLife(tree);
3066 gcInfo.gcMarkRegPtrVal(tree);
3070 /* Must be a 'simple' unary/binary operator */
3072 noway_assert(kind & GTK_SMPOP);
3074 if (tree->gtGetOp2IfPresent())
3076 genEvalSideEffects(tree->gtOp.gtOp1);
3078 tree = tree->gtOp.gtOp2;
3083 tree = tree->gtOp.gtOp1;
3089 /*****************************************************************************
3091 * A persistent pointer value is being overwritten, record it for the GC.
3093 * tgt : the destination being written to
3094 * assignVal : the value being assigned (the source). It must currently be in a register.
3095 * tgtAddrReg : the set of registers being used by "tgt"
3097 * Returns : the mask of the scratch register that was used.
3098 * RBM_NONE if a write-barrier is not needed.
3101 regMaskTP CodeGen::WriteBarrier(GenTree* tgt, GenTree* assignVal, regMaskTP tgtAddrReg)
3103 noway_assert(assignVal->InReg());
3105 GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
3106 if (wbf == GCInfo::WBF_NoBarrier)
3109 regMaskTP resultRegMask = RBM_NONE;
3111 regNumber reg = assignVal->gtRegNum;
3113 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3115 if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
3118 const static int regToHelper[2][8] = {
3119 // If the target is known to be in managed memory
3121 CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
3122 CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
3125 // Don't know if the target is in managed memory
3127 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
3128 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
3129 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
3133 noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
3134 noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
3135 noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
3136 noway_assert(regToHelper[0][REG_ESP] == -1);
3137 noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
3138 noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
3139 noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
3141 noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
3142 noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
3143 noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
3144 noway_assert(regToHelper[1][REG_ESP] == -1);
3145 noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
3146 noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
3147 noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
3149 noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
3152 Generate the following code:
3155 call write_barrier_helper_reg
3157 First grab the RBM_WRITE_BARRIER register for the target address.
3163 if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
3165 rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
3167 regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
3168 regSet.rsMaskLock |= RBM_WRITE_BARRIER;
3174 rg1 = REG_WRITE_BARRIER;
3179 noway_assert(rg1 == REG_WRITE_BARRIER);
3181 /* Generate "lea EDX, [addr-mode]" */
3183 noway_assert(tgt->gtType == TYP_REF);
3184 tgt->gtType = TYP_BYREF;
3185 inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
3187 /* Free up anything that was tied up by the LHS */
3188 genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3190 // In case "tgt" was a comma:
3191 tgt = tgt->gtEffectiveVal();
3193 regTracker.rsTrackRegTrash(rg1);
3194 gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
3195 gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
3197 /* Call the proper vm helper */
3199 // enforced by gcIsWriteBarrierCandidate
3200 noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
3202 unsigned tgtAnywhere = 0;
3203 if ((tgt->gtOper == GT_IND) &&
3204 ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
3209 int helper = regToHelper[tgtAnywhere][reg];
3210 resultRegMask = genRegMask(reg);
3212 gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
3214 genEmitHelperCall(helper,
3216 EA_PTRSIZE); // retSize
3220 regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
3221 regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
3224 return resultRegMask;
3230 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3232 #if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3235 Generate the following code (or its equivalent on the given target):
3239 call write_barrier_helper
3241 First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
3244 if (reg != REG_ARG_1)
3246 // We may need to spill whatever is in the ARG_1 register
3248 if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
3250 regSet.rsSpillReg(REG_ARG_1);
3253 inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
3255 resultRegMask = RBM_ARG_1;
3257 regTracker.rsTrackRegTrash(REG_ARG_1);
3258 gcInfo.gcMarkRegSetNpt(REG_ARG_1);
3259 gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
3261 bool free_arg1 = false;
3262 if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
3264 regSet.rsMaskUsed |= RBM_ARG_1;
3268 // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
3270 /* Generate "lea R0, [addr-mode]" */
3272 noway_assert(tgt->gtType == TYP_REF);
3273 tgt->gtType = TYP_BYREF;
3275 tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
3277 // We may need to spill whatever is in the ARG_0 register
3279 if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0
3280 ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
3281 (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
3283 regSet.rsSpillReg(REG_ARG_0);
3286 inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
3288 /* Free up anything that was tied up by the LHS */
3289 genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
3291 regTracker.rsTrackRegTrash(REG_ARG_0);
3292 gcInfo.gcMarkRegSetNpt(REG_ARG_0);
3293 gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
3296 #if NOGC_WRITE_BARRIERS
3297 // Finally, we may be required to spill whatever is in the further argument registers
3298 // trashed by the call. The write barrier trashes some further registers --
3299 // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
3301 regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
3303 regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
3305 // Spill any other registers trashed by the write barrier call and currently in use.
3306 regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
3308 regSet.rsSpillRegs(mustSpill);
3309 #endif // _TARGET_ARM_
3311 bool free_arg0 = false;
3312 if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
3314 regSet.rsMaskUsed |= RBM_ARG_0;
3318 // genEmitHelperCall might need to grab a register
3319 // so don't let it spill one of the arguments
3321 regMaskTP reallyUsedRegs = RBM_NONE;
3322 regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
3324 genGCWriteBarrier(tgt, wbf);
3326 regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
3327 gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
3331 regSet.rsMaskUsed &= ~RBM_ARG_0;
3335 regSet.rsMaskUsed &= ~RBM_ARG_1;
3338 return resultRegMask;
3340 #endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
3344 /*****************************************************************************
3346 * Generate the appropriate conditional jump(s) right after the low 32 bits
3347 * of two long values have been compared.
3350 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3354 jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3360 inst_JMP(EJ_jne, jumpFalse);
3364 inst_JMP(EJ_jne, jumpTrue);
3371 inst_JMP(EJ_ja, jumpFalse);
3372 inst_JMP(EJ_jb, jumpTrue);
3376 inst_JMP(EJ_jg, jumpFalse);
3377 inst_JMP(EJ_jl, jumpTrue);
3385 inst_JMP(EJ_jb, jumpFalse);
3386 inst_JMP(EJ_ja, jumpTrue);
3390 inst_JMP(EJ_jl, jumpFalse);
3391 inst_JMP(EJ_jg, jumpTrue);
3396 noway_assert(!"expected a comparison operator");
3400 /*****************************************************************************
3402 * Generate the appropriate conditional jump(s) right after the high 32 bits
3403 * of two long values have been compared.
3406 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3411 inst_JMP(EJ_je, jumpTrue);
3415 inst_JMP(EJ_jne, jumpTrue);
3419 inst_JMP(EJ_jb, jumpTrue);
3423 inst_JMP(EJ_jbe, jumpTrue);
3427 inst_JMP(EJ_jae, jumpTrue);
3431 inst_JMP(EJ_ja, jumpTrue);
3435 noway_assert(!"expected comparison");
3438 #elif defined(_TARGET_ARM_)
3439 /*****************************************************************************
3441 * Generate the appropriate conditional jump(s) right after the low 32 bits
3442 * of two long values have been compared.
3445 void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
3449 jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3455 inst_JMP(EJ_ne, jumpFalse);
3459 inst_JMP(EJ_ne, jumpTrue);
3466 inst_JMP(EJ_hi, jumpFalse);
3467 inst_JMP(EJ_lo, jumpTrue);
3471 inst_JMP(EJ_gt, jumpFalse);
3472 inst_JMP(EJ_lt, jumpTrue);
3480 inst_JMP(EJ_lo, jumpFalse);
3481 inst_JMP(EJ_hi, jumpTrue);
3485 inst_JMP(EJ_lt, jumpFalse);
3486 inst_JMP(EJ_gt, jumpTrue);
3491 noway_assert(!"expected a comparison operator");
3495 /*****************************************************************************
3497 * Generate the appropriate conditional jump(s) right after the high 32 bits
3498 * of two long values have been compared.
3501 void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
3506 inst_JMP(EJ_eq, jumpTrue);
3510 inst_JMP(EJ_ne, jumpTrue);
3514 inst_JMP(EJ_lo, jumpTrue);
3518 inst_JMP(EJ_ls, jumpTrue);
3522 inst_JMP(EJ_hs, jumpTrue);
3526 inst_JMP(EJ_hi, jumpTrue);
3530 noway_assert(!"expected comparison");
3534 /*****************************************************************************
3536 * Called by genCondJump() for TYP_LONG.
3539 void CodeGen::genCondJumpLng(GenTree* cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
3541 noway_assert(jumpTrue && jumpFalse);
3542 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
3543 noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
3545 GenTree* op1 = cond->gtOp.gtOp1;
3546 GenTree* op2 = cond->gtOp.gtOp2;
3547 genTreeOps cmp = cond->OperGet();
3551 /* Are we comparing against a constant? */
3553 if (op2->gtOper == GT_CNS_LNG)
3555 __int64 lval = op2->gtLngCon.gtLconVal;
3558 // We're "done" evaluating op2; let's strip any commas off op1 before we
3560 op1 = genCodeForCommaTree(op1);
3562 /* We can generate better code for some special cases */
3563 instruction ins = INS_invalid;
3564 bool useIncToSetFlags = false;
3565 bool specialCaseCmp = false;
3573 useIncToSetFlags = false;
3574 specialCaseCmp = true;
3576 else if (lval == -1)
3580 useIncToSetFlags = true;
3581 specialCaseCmp = true;
3584 else if (cmp == GT_NE)
3590 useIncToSetFlags = false;
3591 specialCaseCmp = true;
3593 else if (lval == -1)
3597 useIncToSetFlags = true;
3598 specialCaseCmp = true;
3604 /* Make the comparand addressable */
3606 addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
3608 regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
3609 insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
3613 regPairNo regPair = op1->gtRegPair;
3614 regNumber rLo = genRegPairLo(regPair);
3615 regNumber rHi = genRegPairHi(regPair);
3616 if (tmpMask & genRegMask(rLo))
3620 else if (tmpMask & genRegMask(rHi))
3627 rTmp = regSet.rsGrabReg(tmpMask);
3628 inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
3631 /* The register is now trashed */
3632 regTracker.rsTrackRegTrash(rTmp);
3636 /* Set the flags using INS_AND | INS_OR */
3637 inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
3641 /* Set the flags using INS_AND | INS_OR */
3642 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3645 else // op1 is not in a register.
3647 rTmp = regSet.rsGrabReg(tmpMask);
3649 /* Load the low 32-bits of op1 */
3650 inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
3652 /* The register is now trashed */
3653 regTracker.rsTrackRegTrash(rTmp);
3655 /* Set the flags using INS_AND | INS_OR */
3656 inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
3659 /* Free up the addrReg(s) if any */
3660 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
3662 /* compares against -1, also requires an an inc instruction */
3663 if (useIncToSetFlags)
3665 /* Make sure the inc will set the flags */
3666 assert(cond->gtSetFlags());
3667 genIncRegBy(rTmp, 1, cond, TYP_INT);
3670 #if FEATURE_STACK_FP_X87
3671 // We may need a transition block
3674 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3677 emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
3678 inst_JMP(jmpKind, jumpTrue);
3680 else // specialCaseCmp == false
3682 /* Make the comparand addressable */
3683 addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
3685 /* Compare the high part first */
3687 int ival = (int)(lval >> 32);
3689 /* Comparing a register against 0 is easier */
3691 if (!ival && (op1->InReg()) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
3693 /* Generate 'test rTmp, rTmp' */
3694 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3698 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3700 /* Special case: comparison of two constants */
3701 // Needed as gtFoldExpr() doesn't fold longs
3703 noway_assert(addrReg == 0);
3704 int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
3706 /* Get the constant operand into a register */
3707 rTmp = genGetRegSetToIcon(op1_hiword);
3709 /* Generate 'cmp rTmp, ival' */
3711 inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3715 /* Generate 'cmp op1, ival' */
3717 inst_TT_IV(INS_cmp, op1, ival, 4);
3721 #if FEATURE_STACK_FP_X87
3722 // We may need a transition block
3725 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3728 /* Generate the appropriate jumps */
3730 if (cond->gtFlags & GTF_UNSIGNED)
3731 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3733 genJccLongHi(cmp, jumpTrue, jumpFalse);
3735 /* Compare the low part second */
3739 /* Comparing a register against 0 is easier */
3741 if (!ival && (op1->InReg()) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
3743 /* Generate 'test rTmp, rTmp' */
3744 instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
3748 if (!(op1->InReg()) && (op1->gtOper == GT_CNS_LNG))
3750 /* Special case: comparison of two constants */
3751 // Needed as gtFoldExpr() doesn't fold longs
3753 noway_assert(addrReg == 0);
3754 int op1_loword = (int)op1->gtLngCon.gtLconVal;
3756 /* get the constant operand into a register */
3757 rTmp = genGetRegSetToIcon(op1_loword);
3759 /* Generate 'cmp rTmp, ival' */
3761 inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
3765 /* Generate 'cmp op1, ival' */
3767 inst_TT_IV(INS_cmp, op1, ival, 0);
3771 /* Generate the appropriate jumps */
3772 genJccLongLo(cmp, jumpTrue, jumpFalse);
3774 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
3777 else // (op2->gtOper != GT_CNS_LNG)
3780 /* The operands would be reversed by physically swapping them */
3782 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
3784 /* Generate the first operand into a register pair */
3786 genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
3787 noway_assert(op1->InReg());
3789 #if CPU_LOAD_STORE_ARCH
3790 /* Generate the second operand into a register pair */
3791 // Fix 388442 ARM JitStress WP7
3792 genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3793 noway_assert(op2->InReg());
3794 regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
3796 /* Make the second operand addressable */
3798 addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
3800 /* Make sure the first operand hasn't been spilled */
3802 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
3803 noway_assert(op1->InReg());
3805 regPairNo regPair = op1->gtRegPair;
3807 #if !CPU_LOAD_STORE_ARCH
3808 /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
3810 addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
3813 #if FEATURE_STACK_FP_X87
3814 // We may need a transition block
3817 jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
3821 /* Perform the comparison - high parts */
3823 inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
3825 if (cond->gtFlags & GTF_UNSIGNED)
3826 genJccLongHi(cmp, jumpTrue, jumpFalse, true);
3828 genJccLongHi(cmp, jumpTrue, jumpFalse);
3830 /* Compare the low parts */
3832 inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
3833 genJccLongLo(cmp, jumpTrue, jumpFalse);
3835 /* Free up anything that was tied up by either operand */
3836 CLANG_FORMAT_COMMENT_ANCHOR;
3838 #if CPU_LOAD_STORE_ARCH
3840 // Fix 388442 ARM JitStress WP7
3841 regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
3842 genReleaseRegPair(op2);
3844 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
3846 genReleaseRegPair(op1);
3850 /*****************************************************************************
3851 * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
3852 * Called by genCondJumpFlt() to generate the fcomp instruction appropriate
3853 * to the architecture we're running on.
3856 * gen_fcomp_FN: fcomp ST(0), stk
3857 * gen_fcomp_FS_TT: fcomp ST(0), addr
3858 * gen_fcompp_FS: fcompp
3859 * These are followed by fnstsw, sahf to get the flags in EFLAGS.
3862 * gen_fcomp_FN: fcomip ST(0), stk
3863 * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0)
3864 * (and reverse the branch condition since addr comes first)
3865 * gen_fcompp_FS: fcomip, fstp
3866 * These instructions will correctly set the EFLAGS register.
3868 * Return value: These functions return true if the instruction has
3869 * already placed its result in the EFLAGS register.
3872 bool CodeGen::genUse_fcomip()
3874 return compiler->opts.compUseFCOMI;
3877 /*****************************************************************************
3879 * Sets the flag for the TYP_INT/TYP_REF comparison.
3880 * We try to use the flags if they have already been set by a prior
3882 * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't
3883 * need to compare again with zero. Just use a "INS_js"
3885 * Returns the flags the following jump/set instruction should use.
3888 emitJumpKind CodeGen::genCondSetFlags(GenTree* cond)
3890 noway_assert(cond->OperIsCompare());
3891 noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
3893 GenTree* op1 = cond->gtOp.gtOp1;
3894 GenTree* op2 = cond->gtOp.gtOp2;
3895 genTreeOps cmp = cond->OperGet();
3897 if (cond->gtFlags & GTF_REVERSE_OPS)
3899 /* Don't forget to modify the condition as well */
3901 cond->gtOp.gtOp1 = op2;
3902 cond->gtOp.gtOp2 = op1;
3903 cond->SetOper(GenTree::SwapRelop(cmp));
3904 cond->gtFlags &= ~GTF_REVERSE_OPS;
3906 /* Get hold of the new values */
3908 cmp = cond->OperGet();
3909 op1 = cond->gtOp.gtOp1;
3910 op2 = cond->gtOp.gtOp2;
3913 // Note that op1's type may get bashed. So save it early
3915 var_types op1Type = op1->TypeGet();
3916 bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
3917 emitAttr size = EA_UNKNOWN;
3920 regMaskTP addrReg1 = RBM_NONE;
3921 regMaskTP addrReg2 = RBM_NONE;
3922 emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
3927 regMaskTP newLiveMask;
3930 /* Are we comparing against a constant? */
3932 if (op2->IsCnsIntOrI())
3934 ssize_t ival = op2->gtIntConCommon.IconValue();
3936 /* unsigned less than comparisons with 1 ('< 1' )
3937 should be transformed into '== 0' to potentially
3938 suppress a tst instruction.
3940 if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
3942 op2->gtIntCon.gtIconVal = ival = 0;
3943 cond->gtOper = cmp = GT_EQ;
3946 /* Comparisons against 0 can be easier */
3950 // if we can safely change the comparison to unsigned we do so
3951 if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
3956 /* unsigned comparisons with 0 should be transformed into
3957 '==0' or '!= 0' to potentially suppress a tst instruction. */
3962 cond->gtOper = cmp = GT_NE;
3963 else if (cmp == GT_LE)
3964 cond->gtOper = cmp = GT_EQ;
3967 /* Is this a simple zero/non-zero test? */
3969 if (cmp == GT_EQ || cmp == GT_NE)
3971 /* Is the operand an "AND" operation? */
3973 if (op1->gtOper == GT_AND)
3975 GenTree* an1 = op1->gtOp.gtOp1;
3976 GenTree* an2 = op1->gtOp.gtOp2;
3978 /* Check for the case "expr & icon" */
3980 if (an2->IsIntCnsFitsInI32())
3982 int iVal = (int)an2->gtIntCon.gtIconVal;
3984 /* make sure that constant is not out of an1's range */
3986 switch (an1->gtType)
3990 if (iVal & 0xffffff00)
3991 goto NO_TEST_FOR_AND;
3995 if (iVal & 0xffff0000)
3996 goto NO_TEST_FOR_AND;
4002 if (an1->IsCnsIntOrI())
4004 // Special case - Both operands of AND are consts
4005 genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
4006 addrReg1 = genRegMask(an1->gtRegNum);
4010 addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
4012 #if CPU_LOAD_STORE_ARCH
4013 if ((an1->InReg()) == 0)
4015 genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
4016 if (arm_Valid_Imm_For_Alu(iVal))
4018 inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
4022 regNumber regTmp = regSet.rsPickFreeReg();
4023 instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
4024 inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
4027 addrReg1 = RBM_NONE;
4032 #ifdef _TARGET_XARCH_
4033 // Check to see if we can use a smaller immediate.
4034 if ((an1->InReg()) && ((iVal & 0x0000FFFF) == iVal))
4036 var_types testType =
4037 (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
4038 #if CPU_HAS_BYTE_REGS
4039 // if we don't have byte-able register, switch to the 2-byte form
4040 if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
4042 testType = TYP_USHORT;
4044 #endif // CPU_HAS_BYTE_REGS
4046 inst_TT_IV(INS_TEST, an1, iVal, testType);
4049 #endif // _TARGET_XARCH_
4051 inst_TT_IV(INS_TEST, an1, iVal);
4060 // TODO: Check for other cases that can generate 'test',
4061 // TODO: also check for a 64-bit integer zero test which
4062 // TODO: could generate 'or lo, hi' followed by jz/jnz.
4066 // See what Jcc instruction we would use if we can take advantage of
4067 // the knowledge of EFLAGs.
4072 Unsigned comparison to 0. Using this table:
4074 ----------------------------------------------------
4075 | Comparison | Flags Checked | Instruction Used |
4076 ----------------------------------------------------
4077 | == 0 | ZF = 1 | je |
4078 ----------------------------------------------------
4079 | != 0 | ZF = 0 | jne |
4080 ----------------------------------------------------
4081 | < 0 | always FALSE | N/A |
4082 ----------------------------------------------------
4083 | <= 0 | ZF = 1 | je |
4084 ----------------------------------------------------
4085 | >= 0 | always TRUE | N/A |
4086 ----------------------------------------------------
4087 | > 0 | ZF = 0 | jne |
4088 ----------------------------------------------------
4111 #elif defined(_TARGET_X86_)
4132 noway_assert(!"Unexpected comparison OpCode");
4139 Signed comparison to 0. Using this table:
4141 -----------------------------------------------------
4142 | Comparison | Flags Checked | Instruction Used |
4143 -----------------------------------------------------
4144 | == 0 | ZF = 1 | je |
4145 -----------------------------------------------------
4146 | != 0 | ZF = 0 | jne |
4147 -----------------------------------------------------
4148 | < 0 | SF = 1 | js |
4149 -----------------------------------------------------
4150 | <= 0 | N/A | N/A |
4151 -----------------------------------------------------
4152 | >= 0 | SF = 0 | jns |
4153 -----------------------------------------------------
4155 -----------------------------------------------------
4179 #elif defined(_TARGET_X86_)
4200 noway_assert(!"Unexpected comparison OpCode");
4203 assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
4205 assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
4207 /* Is the value a simple local variable? */
4209 if (op1->gtOper == GT_LCL_VAR)
4211 /* Is the flags register set to the value? */
4213 if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
4215 if (jumpKind != EJ_NONE)
4217 addrReg1 = RBM_NONE;
4224 /* Make the comparand addressable */
4225 addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4227 /* Are the condition flags set based on the value? */
4229 unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
4233 if (genFlagsAreReg(op1->gtRegNum))
4235 flags |= GTF_ZSF_SET;
4241 if (jumpKind != EJ_NONE)
4247 /* Is the value in a register? */
4251 regNumber reg = op1->gtRegNum;
4253 /* With a 'test' we can do any signed test or any test for equality */
4255 if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
4257 emitAttr compareSize = emitTypeSize(op1->TypeGet());
4259 // If we have an GT_REG_VAR then the register will be properly sign/zero extended
4260 // But only up to 4 bytes
4261 if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
4263 compareSize = EA_4BYTE;
4266 #if CPU_HAS_BYTE_REGS
4267 // Make sure if we require a byte compare that we have a byte-able register
4268 if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
4269 #endif // CPU_HAS_BYTE_REGS
4271 /* Generate 'test reg, reg' */
4272 instGen_Compare_Reg_To_Zero(compareSize, reg);
4279 else // if (ival != 0)
4281 bool smallOk = true;
4283 /* make sure that constant is not out of op1's range
4284 if it is, we need to perform an int with int comparison
4285 and therefore, we set smallOk to false, so op1 gets loaded
4289 /* If op1 is TYP_SHORT, and is followed by an unsigned
4290 * comparison, we can use smallOk. But we don't know which
4291 * flags will be needed. This probably doesn't happen often.
4293 var_types gtType = op1->TypeGet();
4298 if (ival != (signed char)ival)
4303 if (ival != (unsigned char)ival)
4308 if (ival != (signed short)ival)
4312 if (ival != (unsigned short)ival)
4316 #ifdef _TARGET_64BIT_
4318 if (!FitsIn<INT32>(ival))
4322 if (!FitsIn<UINT32>(ival))
4325 #endif // _TARGET_64BIT_
4331 if (smallOk && // constant is in op1's range
4332 !unsignedCmp && // signed comparison
4333 varTypeIsSmall(gtType) && // smalltype var
4334 varTypeIsUnsigned(gtType)) // unsigned type
4339 /* Make the comparand addressable */
4340 addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
4343 /* Special case: comparison of two constants */
4345 // Needed if Importer doesn't call gtFoldExpr()
4347 if (!(op1->InReg()) && (op1->IsCnsIntOrI()))
4349 // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
4351 /* Workaround: get the constant operand into a register */
4352 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4354 noway_assert(addrReg1 == RBM_NONE);
4355 noway_assert(op1->InReg());
4357 addrReg1 = genRegMask(op1->gtRegNum);
4360 /* Compare the operand against the constant */
4362 if (op2->IsIconHandle())
4364 inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
4368 inst_TT_IV(INS_cmp, op1, ival);
4373 //---------------------------------------------------------------------
4375 // We reach here if op2 was not a GT_CNS_INT
4381 if (op1Type == op2->gtType)
4383 shortCmp = varTypeIsShort(op1Type);
4384 byteCmp = varTypeIsByte(op1Type);
4387 noway_assert(op1->gtOper != GT_CNS_INT);
4389 if (op2->gtOper == GT_LCL_VAR)
4392 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4393 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4395 /* Are we comparing against a register? */
4399 /* Make the comparands addressable and mark as used */
4401 assert(addrReg1 == RBM_NONE);
4402 addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
4404 /* Is the size of the comparison byte/char/short ? */
4406 if (varTypeIsSmall(op1->TypeGet()))
4408 /* Is op2 sitting in an appropriate register? */
4410 if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
4413 /* Is op2 of the right type for a small comparison */
4415 if (op2->gtOper == GT_REG_VAR)
4417 if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
4422 if (op1->gtType != op2->gtType)
4426 if (varTypeIsUnsigned(op1->TypeGet()))
4430 assert(addrReg2 == RBM_NONE);
4432 genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
4433 addrReg2 = genRegMask(op2->gtRegNum);
4434 addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
4435 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4436 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4438 /* Compare against the register */
4440 inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
4446 // op1 has been made addressable and is marked as in use
4447 // op2 is un-generated
4448 assert(addrReg2 == 0);
4450 if ((op1->InReg()) == 0)
4452 regNumber reg1 = regSet.rsPickReg();
4454 noway_assert(varTypeIsSmall(op1->TypeGet()));
4455 instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->InReg()) != 0);
4457 // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
4458 // so we need to make sure it is still valid. Note that at this point, reg1 is
4459 // *not* marked as in use, and it is possible for it to be used in the address
4460 // mode expression, but that is OK, because we are done with expression after
4461 // this. We only need reg1.
4462 addrReg1 = genKeepAddressable(op1, addrReg1);
4463 inst_RV_TT(ins, reg1, op1);
4464 regTracker.rsTrackRegTrash(reg1);
4466 genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4469 genMarkTreeInReg(op1, reg1);
4471 regSet.rsMarkRegUsed(op1);
4472 addrReg1 = genRegMask(op1->gtRegNum);
4475 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4476 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4481 // We come here if op2 is not enregistered or not in a "good" register.
4483 assert(addrReg1 == 0);
4485 // Determine what registers go live between op1 and op2
4486 newLiveMask = genNewLiveRegMask(op1, op2);
4488 // Setup regNeed with the set of register that we suggest for op1 to be in
4490 regNeed = RBM_ALLINT;
4492 // avoid selecting registers that get newly born in op2
4493 regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
4495 // avoid selecting op2 reserved regs
4496 regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
4498 #if CPU_HAS_BYTE_REGS
4499 // if necessary setup regNeed to select just the byte-able registers
4501 regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
4502 #endif // CPU_HAS_BYTE_REGS
4504 // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
4506 genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
4507 noway_assert(op1->InReg());
4509 op1Reg = op1->gtRegNum;
4511 // Setup regNeed with the set of register that we require for op1 to be in
4513 regNeed = RBM_ALLINT;
4515 #if CPU_HAS_BYTE_REGS
4516 // if necessary setup regNeed to select just the byte-able registers
4518 regNeed &= RBM_BYTE_REGS;
4519 #endif // CPU_HAS_BYTE_REGS
4521 // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
4522 regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
4524 // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
4525 regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
4527 // Did we end up in an acceptable register?
4528 // and do we have an acceptable free register available to grab?
4530 if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
4532 // Grab an acceptable register
4533 regNumber newReg = regSet.rsGrabReg(regNeed);
4535 noway_assert(op1Reg != newReg);
4537 /* Update the value in the target register */
4539 regTracker.rsTrackRegCopy(newReg, op1Reg);
4541 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
4543 /* The value has been transferred to 'reg' */
4545 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
4546 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
4548 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
4550 /* The value is now in an appropriate register */
4552 op1->gtRegNum = newReg;
4554 noway_assert(op1->InReg());
4555 op1Reg = op1->gtRegNum;
4559 /* Mark the register as 'used' */
4560 regSet.rsMarkRegUsed(op1);
4562 addrReg1 = genRegMask(op1Reg);
4564 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4565 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4569 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4570 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4571 noway_assert(op1->InReg());
4573 // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
4574 // when byteCmp is true we will perform a byte sized cmp instruction
4575 // and that instruction requires that any registers used are byte-able ones.
4577 regNeed = RBM_ALLINT;
4579 #if CPU_HAS_BYTE_REGS
4580 // if necessary setup regNeed to select just the byte-able registers
4582 regNeed &= RBM_BYTE_REGS;
4583 #endif // CPU_HAS_BYTE_REGS
4585 /* Make the comparand addressable */
4586 assert(addrReg2 == 0);
4587 addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
4589 /* Make sure the first operand is still in a register; if
4590 it's been spilled, we have to make sure it's reloaded
4591 into a byte-addressable register if needed.
4592 Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
4595 assert(addrReg1 != 0);
4596 genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
4598 noway_assert(op1->InReg());
4599 noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
4601 addrReg1 = genRegMask(op1->gtRegNum);
4602 regSet.rsLockUsedReg(addrReg1);
4604 /* Make sure that op2 is addressable. If we are going to do a
4605 byte-comparison, we need it to be in a byte register. */
4607 if (byteCmp && (op2->InReg()))
4609 genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
4610 addrReg2 = genRegMask(op2->gtRegNum);
4614 addrReg2 = genKeepAddressable(op2, addrReg2);
4617 regSet.rsUnlockUsedReg(addrReg1);
4619 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4620 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4622 if (byteCmp || shortCmp)
4624 size = emitTypeSize(op2->TypeGet());
4625 if (varTypeIsUnsigned(op1Type))
4630 size = emitActualTypeSize(op2->TypeGet());
4633 /* Perform the comparison */
4634 inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
4638 jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
4640 DONE_FLAGS: // We have determined what jumpKind to use
4642 genUpdateLife(cond);
4644 /* The condition value is dead at the jump that follows */
4646 assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
4647 assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
4648 genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
4649 genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
4651 noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
4656 /*****************************************************************************/
4657 /*****************************************************************************/
4658 /*****************************************************************************
4660 * Generate code to jump to the jump target of the current basic block if
4661 * the given relational operator yields 'true'.
4664 void CodeGen::genCondJump(GenTree* cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
4666 BasicBlock* jumpTrue;
4667 BasicBlock* jumpFalse;
4669 GenTree* op1 = cond->gtOp.gtOp1;
4670 GenTree* op2 = cond->gtOp.gtOp2;
4671 genTreeOps cmp = cond->OperGet();
4675 jumpTrue = destTrue;
4676 jumpFalse = destFalse;
4680 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
4682 jumpTrue = compiler->compCurBB->bbJumpDest;
4683 jumpFalse = compiler->compCurBB->bbNext;
4686 noway_assert(cond->OperIsCompare());
4688 /* Make sure the more expensive operand is 'op1' */
4689 noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
4691 if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
4693 /* Don't forget to modify the condition as well */
4695 cond->gtOp.gtOp1 = op2;
4696 cond->gtOp.gtOp2 = op1;
4697 cond->SetOper(GenTree::SwapRelop(cmp));
4698 cond->gtFlags &= ~GTF_REVERSE_OPS;
4700 /* Get hold of the new values */
4702 cmp = cond->OperGet();
4703 op1 = cond->gtOp.gtOp1;
4704 op2 = cond->gtOp.gtOp2;
4707 /* What is the type of the operand? */
4709 switch (genActualType(op1->gtType))
4714 emitJumpKind jumpKind;
4716 // Check if we can use the currently set flags. Else set them
4718 jumpKind = genCondSetFlags(cond);
4720 #if FEATURE_STACK_FP_X87
4723 genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
4728 /* Generate the conditional jump */
4729 inst_JMP(jumpKind, jumpTrue);
4735 #if FEATURE_STACK_FP_X87
4738 genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
4743 genCondJumpLng(cond, jumpTrue, jumpFalse);
4749 #if FEATURE_STACK_FP_X87
4750 genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
4752 genCondJumpFloat(cond, jumpTrue, jumpFalse);
4758 compiler->gtDispTree(cond);
4760 unreached(); // unexpected/unsupported 'jtrue' operands type
4764 /*****************************************************************************
4765 * Spill registers to check callers can handle it.
4770 void CodeGen::genStressRegs(GenTree* tree)
4772 if (regSet.rsStressRegs() < 2)
4775 /* Spill as many registers as possible. Callers should be prepared
4776 to handle this case.
4777 But don't spill trees with no size (TYP_STRUCT comes to mind) */
4780 regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
4784 for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
4786 if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
4787 (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
4789 regSet.rsSpillReg(regNum);
4791 spillRegs &= regSet.rsMaskUsed;
4799 regMaskTP trashRegs = regSet.rsRegMaskFree();
4801 if (trashRegs == RBM_NONE)
4804 /* It is sometimes reasonable to expect that calling genCodeForTree()
4805 on certain trees won't spill anything */
4807 if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
4808 handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
4810 trashRegs &= ~(RBM_EXCEPTION_OBJECT);
4813 // If genCodeForTree() effectively gets called a second time on the same tree
4817 noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
4818 trashRegs &= ~genRegMask(tree->gtRegNum);
4821 if (tree->gtType == TYP_INT && tree->OperIsSimple())
4823 GenTree* op1 = tree->gtOp.gtOp1;
4824 GenTree* op2 = tree->gtOp.gtOp2;
4825 if (op1 && (op1->InReg()))
4826 trashRegs &= ~genRegMask(op1->gtRegNum);
4827 if (op2 && (op2->InReg()))
4828 trashRegs &= ~genRegMask(op2->gtRegNum);
4831 if (compiler->compCurBB == compiler->genReturnBB)
4833 if (compiler->info.compCallUnmanaged)
4835 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
4836 if (varDsc->lvRegister)
4837 trashRegs &= ~genRegMask(varDsc->lvRegNum);
4841 /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
4842 to save/restore the register. We try to be as unintrusive
4845 noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
4846 // This is obviously false for ARM, but this function is never called.
4847 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
4849 regMaskTP regMask = genRegMask(reg);
4851 if (regSet.rsRegsModified(regMask & trashRegs))
4852 genSetRegToIcon(reg, 0);
4858 /*****************************************************************************
4860 * Generate code for a GTK_CONST tree
4863 void CodeGen::genCodeForTreeConst(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
4865 noway_assert(tree->IsCnsIntOrI());
4866 GenTreeIntConCommon* con = tree->AsIntConCommon();
4867 ssize_t ival = con->IconValue();
4868 bool needReloc = con->ImmedValNeedsReloc(compiler);
4869 regMaskTP needReg = destReg;
4874 /* If we are targeting destReg and ival is zero */
4875 /* we would rather xor needReg than copy another register */
4879 bool reuseConstantInReg = false;
4881 if (destReg == RBM_NONE)
4882 reuseConstantInReg = true;
4885 // If we can set a register to a constant with a small encoding, then do that.
4886 // Assume we'll get a low register if needReg has low registers as options.
4887 if (!reuseConstantInReg &&
4888 !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
4890 reuseConstantInReg = true;
4893 if (!reuseConstantInReg && ival != 0)
4894 reuseConstantInReg = true;
4897 if (reuseConstantInReg)
4899 /* Is the constant already in register? If so, use this register */
4901 reg = regTracker.rsIconIsInReg(ival);
4907 #endif // REDUNDANT_LOAD
4909 reg = regSet.rsPickReg(needReg, bestReg);
4911 /* If the constant is a handle, we need a reloc to be applied to it */
4915 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
4916 regTracker.rsTrackRegTrash(reg);
4920 genSetRegToIcon(reg, ival, tree->TypeGet());
4926 /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
4928 if (!genTempLiveChg)
4930 VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
4931 genTempLiveChg = true;
4935 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
4936 genCodeForTree_DONE(tree, reg);
4939 /*****************************************************************************
4941 * Generate code for a GTK_LEAF tree
4944 void CodeGen::genCodeForTreeLeaf(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
4946 genTreeOps oper = tree->OperGet();
4947 regNumber reg = DUMMY_INIT(REG_CORRUPT);
4948 regMaskTP regs = regSet.rsMaskUsed;
4949 regMaskTP needReg = destReg;
4952 noway_assert(tree->OperKind() & GTK_LEAF);
4957 NO_WAY("GT_REG_VAR should have been caught above");
4962 /* Does the variable live in a register? */
4964 if (genMarkLclVar(tree))
4966 genCodeForTree_REG_VAR1(tree);
4972 /* Is the local variable already in register? */
4974 reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
4978 /* Use the register the variable happens to be in */
4979 regMaskTP regMask = genRegMask(reg);
4981 // If the register that it was in isn't one of the needRegs
4982 // then try to move it into a needReg register
4984 if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
4986 regNumber rg2 = reg;
4987 reg = regSet.rsPickReg(needReg, bestReg);
4990 regMask = genRegMask(reg);
4991 inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
4995 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
4996 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5005 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
5006 // to worry about it being enregistered.
5007 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
5014 /* Pick a register for the value */
5016 reg = regSet.rsPickReg(needReg, bestReg);
5018 /* Load the variable into the register */
5020 size = genTypeSize(tree->gtType);
5022 if (size < EA_4BYTE)
5024 instruction ins = ins_Move_Extend(tree->TypeGet(), tree->InReg());
5025 inst_RV_TT(ins, reg, tree, 0);
5027 /* We've now "promoted" the tree-node to TYP_INT */
5029 tree->gtType = TYP_INT;
5033 inst_RV_TT(INS_mov, reg, tree, 0);
5036 regTracker.rsTrackRegTrash(reg);
5038 gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
5043 regTracker.rsTrackRegClsVar(reg, tree);
5046 regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
5051 noway_assert(!"Unexpected oper");
5055 if (tree->gtFlags & GTF_IND_VOLATILE)
5057 // Emit a memory barrier instruction after the load
5058 instGen_MemoryBarrier();
5069 #if !FEATURE_EH_FUNCLETS
5072 /* Have to clear the shadowSP of the nesting level which
5073 encloses the finally */
5075 unsigned finallyNesting;
5076 finallyNesting = (unsigned)tree->gtVal.gtVal1;
5077 noway_assert(tree->gtVal.gtVal1 <
5078 compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
5079 noway_assert(finallyNesting < compiler->compHndBBtabCount);
5081 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
5082 unsigned filterEndOffsetSlotOffs;
5083 PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
5084 TARGET_POINTER_SIZE); // below doesn't underflow.
5085 filterEndOffsetSlotOffs =
5086 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
5088 unsigned curNestingSlotOffs;
5089 curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
5090 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
5093 #endif // !FEATURE_EH_FUNCLETS
5097 noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
5099 /* Catch arguments get passed in a register. genCodeForBBlist()
5100 would have marked it as holding a GC object, but not used. */
5102 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
5103 reg = REG_EXCEPTION_OBJECT;
5107 genCodeForTreeLeaf_GT_JMP(tree);
5110 case GT_MEMORYBARRIER:
5111 // Emit the memory barrier instruction
5112 instGen_MemoryBarrier();
5118 compiler->gtDispTree(tree);
5120 noway_assert(!"unexpected leaf");
5123 noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
5124 genCodeForTree_DONE(tree, reg);
5127 GenTree* CodeGen::genCodeForCommaTree(GenTree* tree)
5129 while (tree->OperGet() == GT_COMMA)
5131 GenTree* op1 = tree->gtOp.gtOp1;
5132 genEvalSideEffects(op1);
5133 gcInfo.gcMarkRegPtrVal(op1);
5135 tree = tree->gtOp.gtOp2;
5140 /*****************************************************************************
5142 * Generate code for the a leaf node of type GT_JMP
5145 void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTree* tree)
5147 noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
5149 #ifdef PROFILING_SUPPORTED
5150 if (compiler->compIsProfilerHookNeeded())
5152 /* fire the event at the call site */
5153 unsigned saveStackLvl2 = genStackLevel;
5155 compiler->info.compProfilerCallback = true;
5159 // Push the profilerHandle
5161 regMaskTP byrefPushedRegs;
5162 regMaskTP norefPushedRegs;
5163 regMaskTP pushedArgRegs =
5164 genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
5167 if (compiler->compProfilerMethHndIndirected)
5169 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
5170 (ssize_t)compiler->compProfilerMethHnd);
5174 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
5178 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5179 sizeof(int) * 1, // argSize
5180 EA_UNKNOWN); // retSize
5183 // Adjust the number of stack slots used by this managed method if necessary.
5185 if (compiler->fgPtrArgCntMax < 1)
5187 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
5188 compiler->fgPtrArgCntMax = 1;
5191 genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
5193 // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
5194 // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
5195 regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
5196 noway_assert(argReg == REG_PROFILER_JMP_ARG);
5197 regSet.rsLockReg(RBM_PROFILER_JMP_USED);
5199 if (compiler->compProfilerMethHndIndirected)
5201 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
5202 regTracker.rsTrackRegTrash(argReg);
5206 instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
5209 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
5211 EA_UNKNOWN); // retSize
5213 regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
5215 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
5216 #endif //_TARGET_X86_
5218 /* Restore the stack level */
5219 SetStackLevel(saveStackLvl2);
5221 #endif // PROFILING_SUPPORTED
5223 /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to
5224 * call genPInvokeMethodEpilog anywhere that we have a method return. We should really
5225 * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
5228 if (compiler->info.compCallUnmanaged)
5230 genPInvokeMethodEpilog();
5233 // Make sure register arguments are in their initial registers
5234 // and stack arguments are put back as well.
5236 // This does not deal with circular dependencies of register
5237 // arguments, which is safe because RegAlloc prevents that by
5238 // not enregistering any RegArgs when a JMP opcode is used.
5240 if (compiler->info.compArgsCount == 0)
5248 // First move any enregistered stack arguments back to the stack
5249 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5251 noway_assert(varDsc->lvIsParam);
5252 if (varDsc->lvIsRegArg || !varDsc->lvRegister)
5255 /* Argument was passed on the stack, but ended up in a register
5256 * Store it back to the stack */
5257 CLANG_FORMAT_COMMENT_ANCHOR;
5259 #ifndef _TARGET_64BIT_
5260 if (varDsc->TypeGet() == TYP_LONG)
5262 /* long - at least the low half must be enregistered */
5264 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
5266 /* Is the upper half also enregistered? */
5268 if (varDsc->lvOtherReg != REG_STK)
5270 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
5274 #endif // _TARGET_64BIT_
5276 getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
5282 regMaskTP fixedArgsMask = RBM_NONE;
5285 // Next move any un-enregistered register arguments back to their register
5286 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
5288 /* Is this variable a register arg? */
5290 if (!varDsc->lvIsRegArg)
5293 /* Register argument */
5295 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
5296 noway_assert(!varDsc->lvRegister);
5298 /* Reload it from the stack */
5299 CLANG_FORMAT_COMMENT_ANCHOR;
5301 #ifndef _TARGET_64BIT_
5302 if (varDsc->TypeGet() == TYP_LONG)
5304 /* long - at least the low half must be enregistered */
5306 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
5307 regTracker.rsTrackRegTrash(varDsc->lvArgReg);
5309 /* Also assume the upper half also enregistered */
5311 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
5313 regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
5316 fixedArgsMask |= genRegMask(varDsc->lvArgReg);
5317 fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
5321 #endif // _TARGET_64BIT_
5323 if (varDsc->lvIsHfaRegArg())
5325 const var_types elemType = varDsc->GetHfaType();
5326 const instruction loadOp = ins_Load(elemType);
5327 const emitAttr size = emitTypeSize(elemType);
5328 regNumber argReg = varDsc->lvArgReg;
5329 const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
5331 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5333 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5334 assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
5335 argReg = regNextOfType(argReg, elemType);
5338 else if (varDsc->TypeGet() == TYP_STRUCT)
5340 const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes
5341 const instruction loadOp = ins_Load(elemType);
5342 const emitAttr size = emitTypeSize(elemType);
5343 regNumber argReg = varDsc->lvArgReg;
5344 const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
5346 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
5348 getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
5349 regTracker.rsTrackRegTrash(argReg);
5351 fixedArgsMask |= genRegMask(argReg);
5353 argReg = genRegArgNext(argReg);
5357 #endif //_TARGET_ARM_
5359 var_types loadType = varDsc->TypeGet();
5360 regNumber argReg = varDsc->lvArgReg; // incoming arg register
5361 bool twoParts = false;
5363 if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
5365 #ifndef _TARGET_64BIT_
5366 if (loadType == TYP_DOUBLE)
5369 loadType = TYP_I_IMPL;
5370 assert(isValidIntArgReg(argReg));
5373 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
5374 regTracker.rsTrackRegTrash(argReg);
5377 fixedArgsMask |= genRegMask(argReg);
5381 argReg = genRegArgNext(argReg);
5382 assert(isValidIntArgReg(argReg));
5384 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
5385 regTracker.rsTrackRegTrash(argReg);
5388 fixedArgsMask |= genRegMask(argReg);
5395 // Check if we have any non-fixed args possibly in the arg registers.
5396 if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
5398 noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
5400 regNumber regDeclArgs = REG_ARG_FIRST;
5402 // Skip the 'this' pointer.
5403 if (!compiler->info.compIsStatic)
5405 regDeclArgs = REG_NEXT(regDeclArgs);
5408 // Skip the 'generic context.'
5409 if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
5411 regDeclArgs = REG_NEXT(regDeclArgs);
5414 // Skip any 'return buffer arg.'
5415 if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
5417 regDeclArgs = REG_NEXT(regDeclArgs);
5420 // Skip the 'vararg cookie.'
5421 regDeclArgs = REG_NEXT(regDeclArgs);
5423 // Also add offset for the vararg cookie.
5424 int offset = REGSIZE_BYTES;
5426 // Load all the variable arguments in registers back to their registers.
5427 for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
5429 if (!(fixedArgsMask & genRegMask(reg)))
5431 getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
5432 regTracker.rsTrackRegTrash(reg);
5434 offset += REGSIZE_BYTES;
5437 #endif // _TARGET_ARM_
5440 /*****************************************************************************
5442 * Check if a variable is assigned to in a tree. The variable number is
5443 * passed in pCallBackData. If the variable is assigned to, return
5444 * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE.
5446 Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTree** pTree, Compiler::fgWalkData* data)
5448 GenTree* tree = *pTree;
5449 if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
5450 (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
5452 return Compiler::WALK_ABORT;
5455 return Compiler::WALK_CONTINUE;
5458 regNumber CodeGen::genIsEnregisteredIntVariable(GenTree* tree)
5463 if (tree->gtOper == GT_LCL_VAR)
5465 /* Does the variable live in a register? */
5467 varNum = tree->gtLclVarCommon.gtLclNum;
5468 noway_assert(varNum < compiler->lvaCount);
5469 varDsc = compiler->lvaTable + varNum;
5471 if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
5473 return varDsc->lvRegNum;
5481 void CodeGen::unspillLiveness(genLivenessSet* ls)
5483 // Only try to unspill the registers that are missing from the currentLiveRegs
5485 regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
5486 regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
5487 cannotSpillMask &= ~currentLiveRegs;
5489 // Typically this will always be true and we will return
5491 if (cannotSpillMask == 0)
5494 for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
5496 // Is this a register that we cannot leave in the spilled state?
5498 if ((cannotSpillMask & genRegMask(reg)) == 0)
5501 RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
5503 // Was it spilled, if not then skip it.
5508 noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
5510 regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
5514 /*****************************************************************************
5516 * Generate code for a qmark colon
5519 void CodeGen::genCodeForQmark(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
5521 GenTree* op1 = tree->gtOp.gtOp1;
5522 GenTree* op2 = tree->gtOp.gtOp2;
5524 regMaskTP regs = regSet.rsMaskUsed;
5525 regMaskTP needReg = destReg;
5527 noway_assert(compiler->compQmarkUsed);
5528 noway_assert(tree->gtOper == GT_QMARK);
5529 noway_assert(op1->OperIsCompare());
5530 noway_assert(op2->gtOper == GT_COLON);
5532 GenTree* thenNode = op2->AsColon()->ThenNode();
5533 GenTree* elseNode = op2->AsColon()->ElseNode();
5535 /* If elseNode is a Nop node you must reverse the
5536 thenNode and elseNode prior to reaching here!
5537 (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
5539 noway_assert(!elseNode->IsNothingNode());
5541 /* Try to implement the qmark colon using a CMOV. If we can't for
5542 whatever reason, this will return false and we will implement
5543 it using regular branching constructs. */
5545 if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
5549 This is a ?: operator; generate code like this:
5552 jmp_if_true lab_true
5555 op1 (false = 'else' part)
5559 op2 (true = 'then' part)
5564 NOTE: If no 'then' part we do not generate the 'jmp lab_done'
5565 or the 'lab_done' label
5568 BasicBlock* lab_true;
5569 BasicBlock* lab_false;
5570 BasicBlock* lab_done;
5572 genLivenessSet entryLiveness;
5573 genLivenessSet exitLiveness;
5575 lab_true = genCreateTempLabel();
5576 lab_false = genCreateTempLabel();
5578 #if FEATURE_STACK_FP_X87
5579 /* Spill any register that hold partial values so that the exit liveness
5580 from sides is the same */
5581 CLANG_FORMAT_COMMENT_ANCHOR;
5584 regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
5586 // spillMask should be the whole FP stack
5587 noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
5590 SpillTempsStackFP(regSet.rsMaskUsedFloat);
5591 noway_assert(regSet.rsMaskUsedFloat == 0);
5594 /* Before we generate code for qmark, we spill all the currently used registers
5595 that conflict with the registers used in the qmark tree. This is to avoid
5596 introducing spills that only occur on either the 'then' or 'else' side of
5597 the tree, but not both identically. We need to be careful with enregistered
5598 variables that are used; see below.
5601 if (regSet.rsMaskUsed)
5603 /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
5604 variable), then it may not get spilled. However, the variable may
5605 then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
5606 may get spilled from one side and not the other. So unmark regSet.rsMaskVars
5607 before spilling regSet.rsMaskUsed */
5609 regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
5610 regMaskTP rsAdditional = RBM_NONE;
5612 // For each multi-use of an enregistered variable, we need to determine if
5613 // it can get spilled inside the qmark colon. This can only happen if
5614 // its life ends somewhere in the qmark colon. We have the following
5616 // 1) Variable is dead at the end of the colon -- needs to be spilled
5617 // 2) Variable is alive at the end of the colon -- needs to be spilled
5618 // iff it is assigned to in the colon. In order to determine that, we
5619 // examine the GTF_ASG flag to see if any assignments were made in the
5620 // colon. If there are any, we need to do a tree walk to see if this
5621 // variable is the target of an assignment. This treewalk should not
5622 // happen frequently.
5623 if (rsAdditionalCandidates)
5626 if (compiler->verbose)
5628 Compiler::printTreeID(tree);
5629 printf(": Qmark-Colon additional spilling candidates are ");
5630 dspRegMask(rsAdditionalCandidates);
5635 // If any candidates are not alive at the GT_QMARK node, then they
5636 // need to be spilled
5638 const VARSET_TP& rsLiveNow(compiler->compCurLife);
5639 VARSET_TP rsLiveAfter(compiler->fgUpdateLiveSet(compiler->compCurLife, compiler->compCurLifeTree, tree));
5641 VARSET_TP regVarLiveNow(VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
5643 VarSetOps::Iter iter(compiler, regVarLiveNow);
5644 unsigned varIndex = 0;
5645 while (iter.NextElem(&varIndex))
5647 // Find the variable in compiler->lvaTable
5648 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
5649 LclVarDsc* varDsc = compiler->lvaTable + varNum;
5651 #if !FEATURE_FP_REGALLOC
5652 if (varDsc->IsFloatRegType())
5656 noway_assert(varDsc->lvRegister);
5660 if (varTypeIsFloating(varDsc->TypeGet()))
5662 regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
5666 regBit = genRegMask(varDsc->lvRegNum);
5668 // For longs we may need to spill both regs
5669 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
5670 regBit |= genRegMask(varDsc->lvOtherReg);
5673 // Is it one of our reg-use vars? If not, we don't need to spill it.
5674 regBit &= rsAdditionalCandidates;
5678 // Is the variable live at the end of the colon?
5679 if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
5681 // Variable is alive at the end of the colon. Was it assigned
5682 // to inside the colon?
5684 if (!(op2->gtFlags & GTF_ASG))
5687 if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
5688 Compiler::WALK_ABORT)
5690 // Variable was assigned to, so we need to spill it.
5692 rsAdditional |= regBit;
5694 if (compiler->verbose)
5696 Compiler::printTreeID(tree);
5697 printf(": Qmark-Colon candidate ");
5700 printf(" is assigned to inside colon and will be spilled\n");
5707 // Variable is not alive at the end of the colon. We need to spill it.
5709 rsAdditional |= regBit;
5711 if (compiler->verbose)
5713 Compiler::printTreeID(tree);
5714 printf(": Qmark-Colon candidate ");
5717 printf(" is alive at end of colon and will be spilled\n");
5724 if (compiler->verbose)
5726 Compiler::printTreeID(tree);
5727 printf(": Qmark-Colon approved additional spilling candidates are ");
5728 dspRegMask(rsAdditional);
5734 noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
5736 // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
5737 // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
5738 // we will have unbalanced spills and generate bad code.
5740 ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
5743 // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
5744 // 'bad' registers, causing spills. So, just force all used registers to get spilled
5745 // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
5746 // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
5747 // unspilled while generating that same tree.
5749 if (regSet.rsStressRegs() >= 1)
5751 rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
5757 // Remember which registers hold pointers. We will spill
5758 // them, but the code that follows will fetch reg vars from
5759 // the registers, so we need that gc compiler->info.
5760 regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
5761 regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
5763 // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
5764 // So, pretend there aren't any, and spill them anyway. This will only occur
5765 // if rsAdditional is non-empty.
5766 regMaskTP rsTemp = regSet.rsMaskVars;
5767 regSet.ClearMaskVars();
5769 regSet.rsSpillRegs(rsSpill);
5771 // Restore gc tracking masks.
5772 gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
5773 gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
5775 // Set regSet.rsMaskVars back to normal
5776 regSet.rsMaskVars = rsTemp;
5780 // Generate the conditional jump but without doing any StackFP fixups.
5781 genCondJump(op1, lab_true, lab_false, false);
5783 /* Save the current liveness, register status, and GC pointers */
5784 /* This is the liveness information upon entry */
5785 /* to both the then and else parts of the qmark */
5787 saveLiveness(&entryLiveness);
5789 /* Clear the liveness of any local variables that are dead upon */
5790 /* entry to the else part. */
5792 /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
5793 /* from the "colon or op2" liveSet */
5794 genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
5796 /* genCondJump() closes the current emitter block */
5798 genDefineTempLabel(lab_false);
5800 #if FEATURE_STACK_FP_X87
5803 QmarkStateStackFP tempFPState;
5804 bool bHasFPUState = !compCurFPState.IsEmpty();
5805 genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
5808 /* Does the operator yield a value? */
5810 if (tree->gtType == TYP_VOID)
5812 /* Generate the code for the else part of the qmark */
5814 genCodeForTree(elseNode, needReg, bestReg);
5816 /* The type is VOID, so we shouldn't have computed a value */
5818 noway_assert(!(elseNode->InReg()));
5820 /* Save the current liveness, register status, and GC pointers */
5821 /* This is the liveness information upon exit of the then part of the qmark */
5823 saveLiveness(&exitLiveness);
5825 /* Is there a 'then' part? */
5827 if (thenNode->IsNothingNode())
5829 #if FEATURE_STACK_FP_X87
5832 // We had FP state on entry just after the condition, so potentially, the else
5833 // node may have to do transition work.
5834 lab_done = genCreateTempLabel();
5836 /* Generate jmp lab_done */
5838 inst_JMP(EJ_jmp, lab_done);
5840 /* No 'then' - just generate the 'lab_true' label */
5842 genDefineTempLabel(lab_true);
5844 // We need to do this after defining the lab_false label
5845 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5846 genQMarkAfterThenBlockStackFP(&tempFPState);
5847 genDefineTempLabel(lab_done);
5850 #endif // FEATURE_STACK_FP_X87
5852 /* No 'then' - just generate the 'lab_true' label */
5853 genDefineTempLabel(lab_true);
5858 lab_done = genCreateTempLabel();
5860 /* Generate jmp lab_done */
5862 inst_JMP(EJ_jmp, lab_done);
5864 /* Restore the liveness that we had upon entry of the then part of the qmark */
5866 restoreLiveness(&entryLiveness);
5868 /* Clear the liveness of any local variables that are dead upon */
5869 /* entry to the then part. */
5870 genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5872 /* Generate lab_true: */
5874 genDefineTempLabel(lab_true);
5875 #if FEATURE_STACK_FP_X87
5876 // We need to do this after defining the lab_false label
5877 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5879 /* Enter the then part - trash all registers */
5881 regTracker.rsTrackRegClr();
5883 /* Generate the code for the then part of the qmark */
5885 genCodeForTree(thenNode, needReg, bestReg);
5887 /* The type is VOID, so we shouldn't have computed a value */
5889 noway_assert(!(thenNode->InReg()));
5891 unspillLiveness(&exitLiveness);
5893 /* Verify that the exit liveness information is the same for the two parts of the qmark */
5895 checkLiveness(&exitLiveness);
5896 #if FEATURE_STACK_FP_X87
5897 genQMarkAfterThenBlockStackFP(&tempFPState);
5899 /* Define the "result" label */
5901 genDefineTempLabel(lab_done);
5904 /* Join of the two branches - trash all registers */
5906 regTracker.rsTrackRegClr();
5908 /* We're just about done */
5910 genUpdateLife(tree);
5914 /* Generate code for a qmark that generates a value */
5916 /* Generate the code for the else part of the qmark */
5918 noway_assert(elseNode->IsNothingNode() == false);
5920 /* Compute the elseNode into any free register */
5921 genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
5922 noway_assert(elseNode->InReg());
5923 noway_assert(elseNode->gtRegNum != REG_NA);
5925 /* Record the chosen register */
5926 reg = elseNode->gtRegNum;
5927 regs = genRegMask(reg);
5929 /* Save the current liveness, register status, and GC pointers */
5930 /* This is the liveness information upon exit of the else part of the qmark */
5932 saveLiveness(&exitLiveness);
5934 /* Generate jmp lab_done */
5935 lab_done = genCreateTempLabel();
5938 // We will use this to assert we don't emit instructions if we decide not to
5940 unsigned emittedInstructions = getEmitter()->emitInsCount;
5941 bool bSkippedJump = false;
5943 // We would like to know here if the else node is really going to generate
5944 // code, as if it isn't, we're generating here a jump to the next instruction.
5945 // What you would really like is to be able to go back and remove the jump, but
5946 // we have no way of doing that right now.
5949 #if FEATURE_STACK_FP_X87
5950 !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
5952 genIsEnregisteredIntVariable(thenNode) == reg)
5955 // For the moment, fix this easy case (enregistered else node), which
5956 // is the one that happens all the time.
5958 bSkippedJump = true;
5963 inst_JMP(EJ_jmp, lab_done);
5966 /* Restore the liveness that we had upon entry of the else part of the qmark */
5968 restoreLiveness(&entryLiveness);
5970 /* Clear the liveness of any local variables that are dead upon */
5971 /* entry to the then part. */
5972 genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
5974 /* Generate lab_true: */
5975 genDefineTempLabel(lab_true);
5976 #if FEATURE_STACK_FP_X87
5979 // We need to do this after defining the lab_true label
5980 genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
5982 /* Enter the then part - trash all registers */
5984 regTracker.rsTrackRegClr();
5986 /* Generate the code for the then part of the qmark */
5988 noway_assert(thenNode->IsNothingNode() == false);
5990 /* This must place a value into the chosen register */
5991 genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
5993 noway_assert(thenNode->InReg());
5994 noway_assert(thenNode->gtRegNum == reg);
5996 unspillLiveness(&exitLiveness);
5998 /* Verify that the exit liveness information is the same for the two parts of the qmark */
5999 checkLiveness(&exitLiveness);
6000 #if FEATURE_STACK_FP_X87
6001 genQMarkAfterThenBlockStackFP(&tempFPState);
6005 noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
6008 /* Define the "result" label */
6009 genDefineTempLabel(lab_done);
6011 /* Join of the two branches - trash all registers */
6013 regTracker.rsTrackRegClr();
6015 /* Check whether this subtree has freed up any variables */
6017 genUpdateLife(tree);
6019 genMarkTreeInReg(tree, reg);
6023 /*****************************************************************************
6025 * Generate code for a qmark colon using the CMOV instruction. It's OK
6026 * to return false when we can't easily implement it using a cmov (leading
6027 * genCodeForQmark to implement it using branches).
6030 bool CodeGen::genCodeForQmarkWithCMOV(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
6032 #ifdef _TARGET_XARCH_
6033 GenTree* cond = tree->gtOp.gtOp1;
6034 GenTree* colon = tree->gtOp.gtOp2;
6035 // Warning: this naming of the local vars is backwards!
6036 GenTree* thenNode = colon->gtOp.gtOp1;
6037 GenTree* elseNode = colon->gtOp.gtOp2;
6038 GenTree* alwaysNode;
6039 GenTree* predicateNode;
6041 regMaskTP needReg = destReg;
6043 noway_assert(tree->gtOper == GT_QMARK);
6044 noway_assert(cond->OperIsCompare());
6045 noway_assert(colon->gtOper == GT_COLON);
6048 if (JitConfig.JitNoCMOV())
6054 /* Can only implement CMOV on processors that support it */
6056 if (!compiler->opts.compUseCMOV)
6061 /* thenNode better be a local or a constant */
6063 if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
6068 /* elseNode better be a local or a constant or nothing */
6070 if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
6075 /* can't handle two constants here */
6077 if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
6082 /* let's not handle comparisons of non-integer types */
6084 if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
6089 /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary.
6090 The biggest constraint is that cmov doesn't take an integer argument.
6093 bool reverseCond = false;
6094 if (elseNode->OperGet() == GT_CNS_INT)
6096 // else node is a constant
6098 alwaysNode = elseNode;
6099 predicateNode = thenNode;
6104 alwaysNode = thenNode;
6105 predicateNode = elseNode;
6108 // If the live set in alwaysNode is not the same as in tree, then
6109 // the variable in predicate node dies here. This is a dangerous
6110 // case that we don't handle (genComputeReg could overwrite
6111 // the value of the variable in the predicate node).
6113 // This assert is just paranoid (we've already asserted it above)
6114 assert(predicateNode->OperGet() == GT_LCL_VAR);
6115 if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
6120 // Pass this point we are comitting to use CMOV.
6124 compiler->gtReverseCond(cond);
6127 emitJumpKind jumpKind = genCondSetFlags(cond);
6129 // Compute the always node into any free register. If it's a constant,
6130 // we need to generate the mov instruction here (otherwise genComputeReg might
6131 // modify the flags, as in xor reg,reg).
6133 if (alwaysNode->OperGet() == GT_CNS_INT)
6135 reg = regSet.rsPickReg(needReg, bestReg);
6136 inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
6137 gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
6138 regTracker.rsTrackRegTrash(reg);
6142 genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6143 noway_assert(alwaysNode->InReg());
6144 noway_assert(alwaysNode->gtRegNum != REG_NA);
6146 // Record the chosen register
6148 reg = alwaysNode->gtRegNum;
6151 regNumber regPredicate = REG_NA;
6153 // Is predicateNode an enregistered variable?
6155 if (genMarkLclVar(predicateNode))
6157 // Variable lives in a register
6159 regPredicate = predicateNode->gtRegNum;
6164 // Checks if the variable happens to be in any of the registers
6166 regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
6170 const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae,
6171 INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns,
6172 INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg};
6174 noway_assert((unsigned)jumpKind < _countof(EJtoCMOV));
6175 instruction cmov_ins = EJtoCMOV[jumpKind];
6177 noway_assert(insIsCMOV(cmov_ins));
6179 if (regPredicate != REG_NA)
6181 // regPredicate is in a register
6183 inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
6187 // regPredicate is in memory
6189 inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
6191 gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
6192 regTracker.rsTrackRegTrash(reg);
6194 genUpdateLife(alwaysNode);
6195 genUpdateLife(predicateNode);
6196 genCodeForTree_DONE_LIFE(tree, reg);
6203 #ifdef _TARGET_XARCH_
6204 void CodeGen::genCodeForMultEAX(GenTree* tree)
6206 GenTree* op1 = tree->gtOp.gtOp1;
6207 GenTree* op2 = tree->gtGetOp2();
6208 bool ovfl = tree->gtOverflow();
6209 regNumber reg = DUMMY_INIT(REG_CORRUPT);
6212 noway_assert(tree->OperGet() == GT_MUL);
6214 /* We'll evaluate 'op1' first */
6216 regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
6218 /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
6220 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
6221 noway_assert(op1->InReg());
6223 // If op2 is a constant we need to load the constant into a register
6224 if (op2->OperKind() & GTK_CONST)
6226 genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
6227 noway_assert(op2->InReg());
6228 regSet.rsMarkRegUsed(op2);
6229 addrReg = genRegMask(op2->gtRegNum);
6233 /* Make the second operand addressable */
6234 // Try to avoid EAX.
6235 addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
6238 /* Make sure the first operand is still in a register */
6239 // op1 *must* go into EAX.
6240 genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
6241 noway_assert(op1->InReg());
6243 reg = op1->gtRegNum;
6245 // For 8 bit operations, we need to pick byte addressable registers
6247 if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
6249 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6251 inst_RV_RV(INS_mov, byteReg, reg);
6253 regTracker.rsTrackRegTrash(byteReg);
6254 regSet.rsMarkRegFree(genRegMask(reg));
6257 op1->gtRegNum = reg;
6258 regSet.rsMarkRegUsed(op1);
6261 /* Make sure the operand is still addressable */
6262 addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
6264 /* Free up the operand, if it's a regvar */
6268 /* The register is about to be trashed */
6270 regTracker.rsTrackRegTrash(reg);
6272 // For overflow instructions, tree->TypeGet() is the accurate type,
6273 // and gives us the size for the operands.
6275 emitAttr opSize = emitTypeSize(tree->TypeGet());
6277 /* Compute the new value */
6279 noway_assert(op1->gtRegNum == REG_EAX);
6281 // Make sure Edx is free (unless used by op2 itself)
6282 bool op2Released = false;
6284 if ((addrReg & RBM_EDX) == 0)
6286 // op2 does not use Edx, so make sure noone else does either
6287 regSet.rsGrabReg(RBM_EDX);
6289 else if (regSet.rsMaskMult & RBM_EDX)
6291 /* Edx is used by op2 and some other trees.
6292 Spill the other trees besides op2. */
6294 regSet.rsGrabReg(RBM_EDX);
6297 /* keepReg==RegSet::FREE_REG so that the other multi-used trees
6298 don't get marked as unspilled as well. */
6299 regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
6304 if (tree->gtFlags & GTF_UNSIGNED)
6309 inst_TT(ins, op2, 0, 0, opSize);
6311 /* Both EAX and EDX are now trashed */
6313 regTracker.rsTrackRegTrash(REG_EAX);
6314 regTracker.rsTrackRegTrash(REG_EDX);
6316 /* Free up anything that was tied up by the operand */
6319 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
6321 /* The result will be where the first operand is sitting */
6323 /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
6324 genRecoverReg(op1, 0, RegSet::KEEP_REG);
6326 reg = op1->gtRegNum;
6327 noway_assert(reg == REG_EAX);
6331 /* Do we need an overflow check */
6334 genCheckOverflow(tree);
6336 genCodeForTree_DONE(tree, reg);
6338 #endif // _TARGET_XARCH_
6341 void CodeGen::genCodeForMult64(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
6343 GenTree* op1 = tree->gtOp.gtOp1;
6344 GenTree* op2 = tree->gtGetOp2();
6346 noway_assert(tree->OperGet() == GT_MUL);
6348 /* Generate the first operand into some register */
6350 genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6351 noway_assert(op1->InReg());
6353 /* Generate the second operand into some register */
6355 genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
6356 noway_assert(op2->InReg());
6358 /* Make sure the first operand is still in a register */
6359 genRecoverReg(op1, 0, RegSet::KEEP_REG);
6360 noway_assert(op1->InReg());
6362 /* Free up the operands */
6363 genUpdateLife(tree);
6368 regNumber regLo = regSet.rsPickReg(destReg, bestReg);
6371 regSet.rsLockReg(genRegMask(regLo));
6372 regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
6373 regSet.rsUnlockReg(genRegMask(regLo));
6376 if (tree->gtFlags & GTF_UNSIGNED)
6381 getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
6382 regTracker.rsTrackRegTrash(regHi);
6383 regTracker.rsTrackRegTrash(regLo);
6385 /* Do we need an overflow check */
6387 if (tree->gtOverflow())
6389 // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
6391 regSet.rsLockReg(genRegMask(regLo));
6393 if (tree->gtFlags & GTF_MUL_64RSLT)
6394 regSet.rsLockReg(genRegMask(regHi));
6396 regNumber regTmpHi = regHi;
6397 if ((tree->gtFlags & GTF_UNSIGNED) == 0)
6399 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
6400 regTmpHi = regSet.rsPickReg(RBM_ALLINT);
6401 getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
6402 regTracker.rsTrackRegTrash(regTmpHi);
6404 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
6406 // Jump to the block which will throw the expection
6407 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
6408 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
6410 // Unlock regLo [and regHi] after generating code for the gtOverflow() case
6412 regSet.rsUnlockReg(genRegMask(regLo));
6414 if (tree->gtFlags & GTF_MUL_64RSLT)
6415 regSet.rsUnlockReg(genRegMask(regHi));
6418 genUpdateLife(tree);
6420 if (tree->gtFlags & GTF_MUL_64RSLT)
6421 genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
6423 genMarkTreeInReg(tree, regLo);
6425 #endif // _TARGET_ARM_
6427 /*****************************************************************************
6429 * Generate code for a simple binary arithmetic or logical operator.
6430 * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
6433 void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
6436 genTreeOps oper = tree->OperGet();
6437 const var_types treeType = tree->TypeGet();
6438 GenTree* op1 = tree->gtOp.gtOp1;
6439 GenTree* op2 = tree->gtGetOp2();
6440 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
6441 regNumber reg = DUMMY_INIT(REG_CORRUPT);
6442 regMaskTP needReg = destReg;
6444 /* Figure out what instruction to generate */
6477 #ifdef _TARGET_XARCH_
6478 /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
6480 if ((oper == GT_MUL) &&
6481 op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate
6482 !op1->IsCnsIntOrI() && // op1 is not a constant
6483 (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
6484 !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm"
6485 !tree->gtOverflow()) // 3 operand imul doesn't set flags
6487 /* Make the first operand addressable */
6489 regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
6491 /* Grab a register for the target */
6493 reg = regSet.rsPickReg(needReg, bestReg);
6496 /* Compute the value into the target: reg=op1*op2_icon */
6497 if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
6502 regSrc = op1->gtRegNum;
6506 inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
6509 getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
6510 (op2->gtIntCon.gtIconVal & -2), 0);
6513 #endif // LEA_AVAILABLE
6515 /* Compute the value into the target: reg=op1*op2_icon */
6516 inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
6519 /* The register has been trashed now */
6521 regTracker.rsTrackRegTrash(reg);
6523 /* The address is no longer live */
6525 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
6527 genCodeForTree_DONE(tree, reg);
6530 #endif // _TARGET_XARCH_
6536 // We only reach here for GT_ADD, GT_SUB and GT_MUL.
6537 assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
6539 ovfl = tree->gtOverflow();
6541 /* We record the accurate (small) types in trees only we need to
6542 * check for overflow. Otherwise we record genActualType()
6545 noway_assert(ovfl || (treeType == genActualType(treeType)));
6549 /* Can we use an 'lea' to compute the result?
6550 Can't use 'lea' for overflow as it doesn't set flags
6551 Can't use 'lea' unless we have at least two free registers */
6553 bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
6554 genCountBits(regSet.rsMaskLock) + // Locked registers
6555 2 // We will need two regisers
6556 <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
6558 regMaskTP regs = RBM_NONE; // OUT argument
6559 if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, ®s, false))
6563 /* Is the value now computed in some register? */
6567 genCodeForTree_REG_VAR1(tree);
6571 /* If we can reuse op1/2's register directly, and 'tree' is
6572 a simple expression (ie. not in scaled index form),
6573 might as well just use "add" instead of "lea" */
6575 // However, if we're in a context where we want to evaluate "tree" into a specific
6576 // register different from the reg we'd use in this optimization, then it doesn't
6577 // make sense to do the "add", since we'd also have to do a "mov."
6580 reg = op1->gtRegNum;
6582 if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6586 /* Simply add op2 to the register */
6588 inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
6590 if (tree->gtSetFlags())
6591 genFlagsEqualToReg(tree, reg);
6595 else if (op2->OperGet() == GT_CNS_INT)
6597 /* Simply add op2 to the register */
6599 genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
6608 reg = op2->gtRegNum;
6610 if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
6614 /* Simply add op1 to the register */
6616 inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
6618 if (tree->gtSetFlags())
6619 genFlagsEqualToReg(tree, reg);
6626 // The expression either requires a scaled-index form, or the
6627 // op1 or op2's register can't be targeted, this can be
6628 // caused when op1 or op2 are enregistered variables.
6630 reg = regSet.rsPickReg(needReg, bestReg);
6631 size = emitActualTypeSize(treeType);
6633 /* Generate "lea reg, [addr-mode]" */
6635 inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
6637 #ifndef _TARGET_XARCH_
6638 // Don't call genFlagsEqualToReg on x86/x64
6639 // as it does not set the flags
6640 if (tree->gtSetFlags())
6641 genFlagsEqualToReg(tree, reg);
6645 /* The register has been trashed now */
6646 regTracker.rsTrackRegTrash(reg);
6648 genDoneAddressable(tree, regs, RegSet::FREE_REG);
6650 /* The following could be an 'inner' pointer!!! */
6652 noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
6654 if (treeType == TYP_BYREF)
6656 genUpdateLife(tree);
6658 gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
6659 gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
6662 genCodeForTree_DONE(tree, reg);
6667 #endif // LEA_AVAILABLE
6669 noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
6672 /* The following makes an assumption about gtSetEvalOrder(this) */
6674 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
6676 /* Compute a useful register mask */
6677 needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6678 needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6680 // Determine what registers go live between op1 and op2
6681 // Don't bother checking if op1 is already in a register.
6682 // This is not just for efficiency; if it's already in a
6683 // register then it may already be considered "evaluated"
6684 // for the purposes of liveness, in which genNewLiveRegMask
6688 regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
6691 needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
6695 #if CPU_HAS_BYTE_REGS
6696 /* 8-bit operations can only be done in the byte-regs */
6697 if (varTypeIsByte(treeType))
6698 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6699 #endif // CPU_HAS_BYTE_REGS
6701 // Try selecting one of the 'bestRegs'
6702 needReg = regSet.rsNarrowHint(needReg, bestReg);
6704 /* Special case: small_val & small_mask */
6706 if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
6708 size_t and_val = op2->gtIntCon.gtIconVal;
6710 var_types typ = op1->TypeGet();
6717 andMask = 0x000000FF;
6721 andMask = 0x0000FFFF;
6724 noway_assert(!"unexpected type");
6728 // Is the 'and_val' completely contained within the bits found in 'andMask'
6729 if ((and_val & ~andMask) == 0)
6731 // We must use unsigned instructions when loading op1
6732 if (varTypeIsByte(typ))
6734 op1->gtType = TYP_UBYTE;
6736 else // varTypeIsShort(typ)
6738 assert(varTypeIsShort(typ));
6739 op1->gtType = TYP_USHORT;
6742 /* Generate the first operand into a scratch register */
6744 op1 = genCodeForCommaTree(op1);
6745 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6747 noway_assert(op1->InReg());
6749 regNumber op1Reg = op1->gtRegNum;
6751 // Did we end up in an acceptable register?
6752 // and do we have an acceptable free register available to grab?
6754 if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6756 // See if we can pick a register from bestReg
6759 // Grab an acceptable register
6761 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6762 newReg = regSet.rsGrabReg(bestReg);
6764 newReg = regSet.rsGrabReg(needReg);
6766 noway_assert(op1Reg != newReg);
6768 /* Update the value in the target register */
6770 regTracker.rsTrackRegCopy(newReg, op1Reg);
6772 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6774 /* The value has been transferred to 'reg' */
6776 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6777 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6779 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6781 /* The value is now in an appropriate register */
6783 op1->gtRegNum = newReg;
6785 noway_assert(op1->InReg());
6788 /* Mark the register as 'used' */
6789 regSet.rsMarkRegUsed(op1);
6790 reg = op1->gtRegNum;
6792 if (and_val != andMask) // Does the "and" mask only cover some of the bits?
6794 /* "and" the value */
6796 inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
6800 /* Update the live set of register variables */
6801 if (compiler->opts.varNames)
6802 genUpdateLife(tree);
6805 /* Now we can update the register pointer information */
6808 gcInfo.gcMarkRegPtrVal(reg, treeType);
6810 genCodeForTree_DONE_LIFE(tree, reg);
6815 #ifdef _TARGET_XARCH_
6817 // Do we have to use the special "imul" instruction
6818 // which has eax as the implicit operand ?
6820 bool multEAX = false;
6824 if (tree->gtFlags & GTF_MUL_64RSLT)
6826 /* Only multiplying with EAX will leave the 64-bit
6827 * result in EDX:EAX */
6833 if (tree->gtFlags & GTF_UNSIGNED)
6835 /* "mul reg/mem" always has EAX as default operand */
6839 else if (varTypeIsSmall(treeType))
6841 /* Only the "imul with EAX" encoding has the 'w' bit
6842 * to specify the size of the operands */
6851 noway_assert(oper == GT_MUL);
6853 return genCodeForMultEAX(tree);
6855 #endif // _TARGET_XARCH_
6859 // Do we have to use the special 32x32 => 64 bit multiply
6861 bool mult64 = false;
6865 if (tree->gtFlags & GTF_MUL_64RSLT)
6871 // We always must use the 32x32 => 64 bit multiply
6872 // to detect overflow
6879 noway_assert(oper == GT_MUL);
6881 return genCodeForMult64(tree, destReg, bestReg);
6883 #endif // _TARGET_ARM_
6885 /* Generate the first operand into a scratch register */
6887 op1 = genCodeForCommaTree(op1);
6888 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
6890 noway_assert(op1->InReg());
6892 regNumber op1Reg = op1->gtRegNum;
6894 // Setup needReg with the set of register that we require for op1 to be in
6896 needReg = RBM_ALLINT;
6898 /* Compute a useful register mask */
6899 needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
6900 needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
6902 #if CPU_HAS_BYTE_REGS
6903 /* 8-bit operations can only be done in the byte-regs */
6904 if (varTypeIsByte(treeType))
6905 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
6906 #endif // CPU_HAS_BYTE_REGS
6908 // Did we end up in an acceptable register?
6909 // and do we have an acceptable free register available to grab?
6911 if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
6913 // See if we can pick a register from bestReg
6916 // Grab an acceptable register
6918 if ((bestReg & regSet.rsRegMaskFree()) != 0)
6919 newReg = regSet.rsGrabReg(bestReg);
6921 newReg = regSet.rsGrabReg(needReg);
6923 noway_assert(op1Reg != newReg);
6925 /* Update the value in the target register */
6927 regTracker.rsTrackRegCopy(newReg, op1Reg);
6929 inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
6931 /* The value has been transferred to 'reg' */
6933 if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
6934 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
6936 gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
6938 /* The value is now in an appropriate register */
6940 op1->gtRegNum = newReg;
6942 noway_assert(op1->InReg());
6943 op1Reg = op1->gtRegNum;
6947 /* Mark the register as 'used' */
6948 regSet.rsMarkRegUsed(op1);
6950 bool isSmallConst = false;
6953 if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
6955 isSmallConst = true;
6958 /* Make the second operand addressable */
6960 regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
6962 #if CPU_LOAD_STORE_ARCH
6963 genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
6964 #else // !CPU_LOAD_STORE_ARCH
6965 /* Is op1 spilled and op2 in a register? */
6967 if ((op1->gtFlags & GTF_SPILLED) && (op2->InReg()) && (ins != INS_sub))
6969 noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
6971 // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
6972 noway_assert(op2->gtOper != GT_LCL_VAR ||
6973 varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
6975 reg = op2->gtRegNum;
6976 regMaskTP regMask = genRegMask(reg);
6978 /* Is the register holding op2 available? */
6980 if (regMask & regSet.rsMaskVars)
6985 /* Get the temp we spilled into. */
6987 TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
6989 /* For 8bit operations, we need to make sure that op2 is
6990 in a byte-addressable registers */
6992 if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
6994 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
6996 inst_RV_RV(INS_mov, byteReg, reg);
6997 regTracker.rsTrackRegTrash(byteReg);
6999 /* op2 couldn't have spilled as it was not sitting in
7000 RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
7001 noway_assert(op2->InReg());
7003 regSet.rsUnlockReg(regMask);
7004 regSet.rsMarkRegFree(regMask);
7007 regMask = genRegMask(reg);
7008 op2->gtRegNum = reg;
7009 regSet.rsMarkRegUsed(op2);
7012 inst_RV_ST(ins, reg, temp, 0, treeType);
7014 regTracker.rsTrackRegTrash(reg);
7018 compiler->tmpRlsTemp(temp);
7020 /* 'add'/'sub' set all CC flags, others only ZF */
7022 /* If we need to check overflow, for small types, the
7023 * flags can't be used as we perform the arithmetic
7024 * operation (on small registers) and then sign extend it
7026 * NOTE : If we ever don't need to sign-extend the result,
7027 * we can use the flags
7030 if (tree->gtSetFlags())
7032 genFlagsEqualToReg(tree, reg);
7035 /* The result is where the second operand is sitting. Mark result reg as free */
7036 regSet.rsMarkRegFree(genRegMask(reg));
7038 gcInfo.gcMarkRegPtrVal(reg, treeType);
7043 #endif // !CPU_LOAD_STORE_ARCH
7045 /* Make sure the first operand is still in a register */
7046 regSet.rsLockUsedReg(addrReg);
7047 genRecoverReg(op1, 0, RegSet::KEEP_REG);
7048 noway_assert(op1->InReg());
7049 regSet.rsUnlockUsedReg(addrReg);
7051 reg = op1->gtRegNum;
7053 // For 8 bit operations, we need to pick byte addressable registers
7055 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
7057 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7059 inst_RV_RV(INS_mov, byteReg, reg);
7061 regTracker.rsTrackRegTrash(byteReg);
7062 regSet.rsMarkRegFree(genRegMask(reg));
7065 op1->gtRegNum = reg;
7066 regSet.rsMarkRegUsed(op1);
7069 /* Make sure the operand is still addressable */
7070 addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
7072 /* Free up the operand, if it's a regvar */
7076 /* The register is about to be trashed */
7078 regTracker.rsTrackRegTrash(reg);
7081 bool op2Released = false;
7083 // For overflow instructions, tree->gtType is the accurate type,
7084 // and gives us the size for the operands.
7086 emitAttr opSize = emitTypeSize(treeType);
7088 /* Compute the new value */
7090 if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
7091 #if !CPU_HAS_FP_SUPPORT
7092 && (treeType == TYP_INT || treeType == TYP_I_IMPL)
7096 ssize_t ival = op2->gtIntCon.gtIconVal;
7100 genIncRegBy(reg, ival, tree, treeType, ovfl);
7102 else if (oper == GT_SUB)
7104 if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7105 (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
7106 // Therefore we can't use -ival.
7109 /* For unsigned overflow, we have to use INS_sub to set
7110 the flags correctly */
7112 genDecRegBy(reg, ival, tree);
7116 /* Else, we simply add the negative of the value */
7118 genIncRegBy(reg, -ival, tree, treeType, ovfl);
7121 else if (oper == GT_MUL)
7123 genMulRegBy(reg, ival, tree, treeType, ovfl);
7128 // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
7129 op2 = op2->gtEffectiveVal();
7130 if (varTypeIsByte(treeType) && op2->InReg())
7132 noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
7134 regNumber op2reg = op2->gtRegNum;
7135 regMaskTP op2regMask = genRegMask(op2reg);
7137 if (!(op2regMask & RBM_BYTE_REGS))
7139 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
7141 inst_RV_RV(INS_mov, byteReg, op2reg);
7142 regTracker.rsTrackRegTrash(byteReg);
7144 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7147 op2->gtRegNum = byteReg;
7151 inst_RV_TT(ins, reg, op2, 0, opSize, flags);
7154 /* Free up anything that was tied up by the operand */
7158 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7161 /* The result will be where the first operand is sitting */
7163 /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
7164 genRecoverReg(op1, 0, RegSet::KEEP_REG);
7166 reg = op1->gtRegNum;
7168 /* 'add'/'sub' set all CC flags, others only ZF+SF */
7170 if (tree->gtSetFlags())
7171 genFlagsEqualToReg(tree, reg);
7175 #if !CPU_LOAD_STORE_ARCH
7177 #endif // !CPU_LOAD_STORE_ARCH
7179 /* Do we need an overflow check */
7182 genCheckOverflow(tree);
7184 genCodeForTree_DONE(tree, reg);
7187 /*****************************************************************************
7189 * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
7190 * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
7193 void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7196 const genTreeOps oper = tree->OperGet();
7197 const var_types treeType = tree->TypeGet();
7198 GenTree* op1 = tree->gtOp.gtOp1;
7199 GenTree* op2 = tree->gtGetOp2();
7200 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7201 regNumber reg = DUMMY_INIT(REG_CORRUPT);
7202 regMaskTP needReg = destReg;
7205 /* Figure out what instruction to generate */
7238 // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
7240 ovfl = tree->gtOverflow();
7242 // We can't use += with overflow if the value cannot be changed
7243 // in case of an overflow-exception which the "+" might cause
7244 noway_assert(!ovfl ||
7245 ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
7247 /* Do not allow overflow instructions with refs/byrefs */
7249 noway_assert(!ovfl || !varTypeIsGC(treeType));
7251 // We disallow overflow and byte-ops here as it is too much trouble
7252 noway_assert(!ovfl || !varTypeIsByte(treeType));
7254 /* Is the second operand a constant? */
7256 if (op2->IsIntCnsFitsInI32())
7258 int ival = (int)op2->gtIntCon.gtIconVal;
7260 /* What is the target of the assignment? */
7262 switch (op1->gtOper)
7268 reg = op1->gtRegVar.gtRegNum;
7270 /* No registers are needed for addressing */
7273 #if !CPU_LOAD_STORE_ARCH
7276 /* We're adding a constant to a register */
7278 if (oper == GT_ASG_ADD)
7279 genIncRegBy(reg, ival, tree, treeType, ovfl);
7280 else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
7281 ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
7283 // Therefore we can't
7286 /* For unsigned overflow, we have to use INS_sub to set
7287 the flags correctly */
7288 genDecRegBy(reg, ival, tree);
7290 genIncRegBy(reg, -ival, tree, treeType, ovfl);
7296 /* Does the variable live in a register? */
7298 if (genMarkLclVar(op1))
7305 /* Make the target addressable for load/store */
7306 addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
7308 #if !CPU_LOAD_STORE_ARCH
7309 // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7311 /* For small types with overflow check, we need to
7312 sign/zero extend the result, so we need it in a reg */
7314 if (ovfl && genTypeSize(treeType) < sizeof(int))
7315 #endif // !CPU_LOAD_STORE_ARCH
7317 // Load op1 into a reg
7319 reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
7321 inst_RV_TT(INS_mov, reg, op1);
7323 // Issue the add/sub and the overflow check
7325 inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
7326 regTracker.rsTrackRegTrash(reg);
7330 genCheckOverflow(tree);
7333 /* Store the (sign/zero extended) result back to
7334 the stack location of the variable */
7336 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7340 #if !CPU_LOAD_STORE_ARCH
7343 /* Add/subtract the new value into/from the target */
7347 reg = op1->gtRegNum;
7351 /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
7352 if (!ovfl && (ival == 1 || ival == -1) &&
7353 !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
7355 noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
7356 if (oper == GT_ASG_SUB)
7359 ins = (ival > 0) ? INS_inc : INS_dec;
7364 inst_TT_IV(ins, op1, ival);
7367 if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
7369 if (tree->gtSetFlags())
7370 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
7375 #endif // !CPU_LOAD_STORE_ARCH
7376 } // end switch (op1->gtOper)
7378 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7380 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7382 } // end if (op2->IsIntCnsFitsInI32())
7383 } // end if (isArith)
7385 noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
7387 /* Is the target a register or local variable? */
7389 switch (op1->gtOper)
7393 /* Does the target variable live in a register? */
7395 if (!genMarkLclVar(op1))
7402 /* Get hold of the target register */
7404 reg = op1->gtRegVar.gtRegNum;
7406 /* Make sure the target of the store is available */
7408 if (regSet.rsMaskUsed & genRegMask(reg))
7410 regSet.rsSpillReg(reg);
7413 /* Make the RHS addressable */
7415 addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
7417 /* Compute the new value into the target register */
7418 CLANG_FORMAT_COMMENT_ANCHOR;
7420 #if CPU_HAS_BYTE_REGS
7422 // Fix 383833 X86 ILGEN
7426 reg2 = op2->gtRegNum;
7433 // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
7434 // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
7436 if (varTypeIsByte(treeType) &&
7437 (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
7439 // We will force op2 into a register (via sign/zero extending load)
7440 // for the cases where op2 is in memory and thus could have
7441 // an unmapped page just beyond its location
7443 if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
7445 genCodeForTree(op2, 0);
7446 assert(op2->InReg());
7449 inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
7451 bool canOmit = false;
7453 if (varTypeIsUnsigned(treeType))
7455 // When op2 is a byte sized constant we can omit the zero extend instruction
7456 if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
7461 else // treeType is signed
7463 // When op2 is a positive 7-bit or smaller constant
7464 // we can omit the sign extension sequence.
7465 if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
7473 // If reg is a byte reg then we can use a movzx/movsx instruction
7475 if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
7477 instruction extendIns = ins_Move_Extend(treeType, true);
7478 inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
7480 else // we can't encode a movzx/movsx instruction
7482 if (varTypeIsUnsigned(treeType))
7484 // otherwise, we must zero the upper 24 bits of 'reg'
7485 inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
7487 else // treeType is signed
7489 // otherwise, we must sign extend the result in the non-byteable register 'reg'
7490 // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
7491 // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
7493 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
7494 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
7500 #endif // CPU_HAS_BYTE_REGS
7502 inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
7505 /* The zero flag is now equal to the register value */
7507 if (tree->gtSetFlags())
7508 genFlagsEqualToReg(tree, reg);
7510 /* Remember that we trashed the target */
7512 regTracker.rsTrackRegTrash(reg);
7514 /* Free up anything that was tied up by the RHS */
7516 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
7518 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7523 } // end switch (op1->gtOper)
7525 #if !CPU_LOAD_STORE_ARCH
7526 /* Special case: "x ^= -1" is actually "not(x)" */
7528 if (oper == GT_ASG_XOR)
7530 if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
7532 addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
7533 inst_TT(INS_NOT, op1);
7534 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7536 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7540 #endif // !CPU_LOAD_STORE_ARCH
7542 /* Setup target mask for op2 (byte-regs for small operands) */
7545 needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
7547 /* Is the second operand a constant? */
7549 if (op2->IsIntCnsFitsInI32())
7551 int ival = (int)op2->gtIntCon.gtIconVal;
7553 /* Make the target addressable */
7554 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
7556 inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
7558 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
7560 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
7564 /* Is the value or the address to be computed first? */
7566 if (tree->gtFlags & GTF_REVERSE_OPS)
7568 /* Compute the new value into a register */
7570 genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7572 /* Make the target addressable for load/store */
7573 addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
7574 regSet.rsLockUsedReg(addrReg);
7576 #if !CPU_LOAD_STORE_ARCH
7577 // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7578 /* For small types with overflow check, we need to
7579 sign/zero extend the result, so we need it in a reg */
7581 if (ovfl && genTypeSize(treeType) < sizeof(int))
7582 #endif // !CPU_LOAD_STORE_ARCH
7584 reg = regSet.rsPickReg();
7585 regSet.rsLockReg(genRegMask(reg));
7587 noway_assert(genIsValidReg(reg));
7589 /* Generate "ldr reg, [var]" */
7591 inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
7593 if (op1->gtOper == GT_LCL_VAR)
7594 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7596 regTracker.rsTrackRegTrash(reg);
7598 /* Make sure the new value is in a register */
7600 genRecoverReg(op2, 0, RegSet::KEEP_REG);
7602 /* Compute the new value */
7604 inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7607 genCheckOverflow(tree);
7609 /* Move the new value back to the variable */
7610 /* Generate "str reg, [var]" */
7612 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7613 regSet.rsUnlockReg(genRegMask(reg));
7615 if (op1->gtOper == GT_LCL_VAR)
7616 regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
7618 #if !CPU_LOAD_STORE_ARCH
7621 /* Make sure the new value is in a register */
7623 genRecoverReg(op2, 0, RegSet::KEEP_REG);
7625 /* Add the new value into the target */
7627 inst_TT_RV(ins, op1, op2->gtRegNum);
7629 #endif // !CPU_LOAD_STORE_ARCH
7630 /* Free up anything that was tied up either side */
7631 regSet.rsUnlockUsedReg(addrReg);
7632 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7637 /* Make the target addressable */
7639 addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
7641 /* Compute the new value into a register */
7643 genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
7644 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
7646 /* Make sure the target is still addressable */
7648 addrReg = genKeepAddressable(op1, addrReg);
7649 regSet.rsLockUsedReg(addrReg);
7651 #if !CPU_LOAD_STORE_ARCH
7652 // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
7654 /* For small types with overflow check, we need to
7655 sign/zero extend the result, so we need it in a reg */
7657 if (ovfl && genTypeSize(treeType) < sizeof(int))
7658 #endif // !CPU_LOAD_STORE_ARCH
7660 reg = regSet.rsPickReg();
7662 inst_RV_TT(INS_mov, reg, op1);
7664 inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
7665 regTracker.rsTrackRegTrash(reg);
7668 genCheckOverflow(tree);
7670 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
7672 if (op1->gtOper == GT_LCL_VAR)
7673 regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
7675 #if !CPU_LOAD_STORE_ARCH
7678 /* Add the new value into the target */
7680 inst_TT_RV(ins, op1, op2->gtRegNum);
7684 /* Free up anything that was tied up either side */
7685 regSet.rsUnlockUsedReg(addrReg);
7686 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
7688 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
7692 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
7695 /*****************************************************************************
7697 * Generate code for GT_UMOD.
7700 void CodeGen::genCodeForUnsignedMod(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7702 assert(tree->OperGet() == GT_UMOD);
7704 GenTree* op1 = tree->gtOp.gtOp1;
7705 GenTree* op2 = tree->gtOp.gtOp2;
7706 const var_types treeType = tree->TypeGet();
7707 regMaskTP needReg = destReg;
7710 /* Is this a division by an integer constant? */
7713 if (compiler->fgIsUnsignedModOptimizable(op2))
7715 /* Generate the operand into some register */
7717 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7718 noway_assert(op1->InReg());
7720 reg = op1->gtRegNum;
7722 /* Generate the appropriate sequence */
7723 size_t ival = op2->gtIntCon.gtIconVal - 1;
7724 inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
7726 /* The register is now trashed */
7728 regTracker.rsTrackRegTrash(reg);
7730 genCodeForTree_DONE(tree, reg);
7734 genCodeForGeneralDivide(tree, destReg, bestReg);
7737 /*****************************************************************************
7739 * Generate code for GT_MOD.
7742 void CodeGen::genCodeForSignedMod(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7744 assert(tree->OperGet() == GT_MOD);
7746 GenTree* op1 = tree->gtOp.gtOp1;
7747 GenTree* op2 = tree->gtOp.gtOp2;
7748 const var_types treeType = tree->TypeGet();
7749 regMaskTP needReg = destReg;
7752 /* Is this a division by an integer constant? */
7755 if (compiler->fgIsSignedModOptimizable(op2))
7757 ssize_t ival = op2->gtIntCon.gtIconVal;
7758 BasicBlock* skip = genCreateTempLabel();
7760 /* Generate the operand into some register */
7762 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7763 noway_assert(op1->InReg());
7765 reg = op1->gtRegNum;
7767 /* Generate the appropriate sequence */
7769 inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
7771 /* The register is now trashed */
7773 regTracker.rsTrackRegTrash(reg);
7775 /* Check and branch for a postive value */
7776 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7777 inst_JMP(jmpGEL, skip);
7779 /* Generate the rest of the sequence and we're done */
7781 genIncRegBy(reg, -1, NULL, treeType);
7783 if ((treeType == TYP_LONG) && ((int)ival != ival))
7785 regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
7786 instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
7787 inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
7791 inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
7793 genIncRegBy(reg, 1, NULL, treeType);
7795 /* Define the 'skip' label and we're done */
7797 genDefineTempLabel(skip);
7799 genCodeForTree_DONE(tree, reg);
7803 genCodeForGeneralDivide(tree, destReg, bestReg);
7806 /*****************************************************************************
7808 * Generate code for GT_UDIV.
7811 void CodeGen::genCodeForUnsignedDiv(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7813 assert(tree->OperGet() == GT_UDIV);
7815 GenTree* op1 = tree->gtOp.gtOp1;
7816 GenTree* op2 = tree->gtOp.gtOp2;
7817 const var_types treeType = tree->TypeGet();
7818 regMaskTP needReg = destReg;
7821 /* Is this a division by an integer constant? */
7824 if (compiler->fgIsUnsignedDivOptimizable(op2))
7826 size_t ival = op2->gtIntCon.gtIconVal;
7828 /* Division by 1 must be handled elsewhere */
7830 noway_assert(ival != 1 || compiler->opts.MinOpts());
7832 /* Generate the operand into some register */
7834 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7835 noway_assert(op1->InReg());
7837 reg = op1->gtRegNum;
7839 /* Generate "shr reg, log2(value)" */
7841 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
7843 /* The register is now trashed */
7845 regTracker.rsTrackRegTrash(reg);
7847 genCodeForTree_DONE(tree, reg);
7851 genCodeForGeneralDivide(tree, destReg, bestReg);
7854 /*****************************************************************************
7856 * Generate code for GT_DIV.
7859 void CodeGen::genCodeForSignedDiv(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7861 assert(tree->OperGet() == GT_DIV);
7863 GenTree* op1 = tree->gtOp.gtOp1;
7864 GenTree* op2 = tree->gtOp.gtOp2;
7865 const var_types treeType = tree->TypeGet();
7866 regMaskTP needReg = destReg;
7869 /* Is this a division by an integer constant? */
7872 if (compiler->fgIsSignedDivOptimizable(op2))
7874 ssize_t ival_s = op2->gtIntConCommon.IconValue();
7875 assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
7876 size_t ival = static_cast<size_t>(ival_s);
7878 /* Division by 1 must be handled elsewhere */
7880 noway_assert(ival != 1);
7882 BasicBlock* onNegDivisee = genCreateTempLabel();
7884 /* Generate the operand into some register */
7886 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
7887 noway_assert(op1->InReg());
7889 reg = op1->gtRegNum;
7893 /* Generate "sar reg, log2(value)" */
7895 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
7897 // Check and branch for a postive value, skipping the INS_ADDC instruction
7898 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7899 inst_JMP(jmpGEL, onNegDivisee);
7901 // Add the carry flag to 'reg'
7902 inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
7904 /* Define the 'onNegDivisee' label and we're done */
7906 genDefineTempLabel(onNegDivisee);
7908 /* The register is now trashed */
7910 regTracker.rsTrackRegTrash(reg);
7912 /* The result is the same as the operand */
7914 reg = op1->gtRegNum;
7918 /* Generate the following sequence */
7927 instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
7929 // Check and branch for a postive value, skipping the INS_add instruction
7930 emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
7931 inst_JMP(jmpGEL, onNegDivisee);
7933 inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
7935 /* Define the 'onNegDivisee' label and we're done */
7937 genDefineTempLabel(onNegDivisee);
7939 /* Generate "sar reg, log2(value)" */
7941 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
7943 /* The register is now trashed */
7945 regTracker.rsTrackRegTrash(reg);
7947 /* The result is the same as the operand */
7949 reg = op1->gtRegNum;
7952 genCodeForTree_DONE(tree, reg);
7956 genCodeForGeneralDivide(tree, destReg, bestReg);
7959 /*****************************************************************************
7961 * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
7962 * (if op2 is not a power of 2 constant).
7965 void CodeGen::genCodeForGeneralDivide(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
7967 assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
7968 tree->OperGet() == GT_DIV);
7970 GenTree* op1 = tree->gtOp.gtOp1;
7971 GenTree* op2 = tree->gtOp.gtOp2;
7972 const var_types treeType = tree->TypeGet();
7973 regMaskTP needReg = destReg;
7979 #if USE_HELPERS_FOR_INT_DIV
7980 noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
7983 #if defined(_TARGET_XARCH_)
7985 /* Which operand are we supposed to evaluate first? */
7987 if (tree->gtFlags & GTF_REVERSE_OPS)
7989 /* We'll evaluate 'op2' first */
7992 destReg &= ~op1->gtRsvdRegs;
7994 /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
7995 if (op1->gtOper == GT_LCL_VAR)
7997 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
7998 noway_assert(varNum < compiler->lvaCount);
7999 LclVarDsc* varDsc = compiler->lvaTable + varNum;
8000 if (varDsc->lvRegister)
8002 destReg &= ~genRegMask(varDsc->lvRegNum);
8008 /* We'll evaluate 'op1' first */
8013 if (RBM_EAX & op2->gtRsvdRegs)
8014 op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8016 op1Mask = RBM_EAX; // EAX would be ideal
8018 /* Generate the dividend into EAX and hold on to it. freeOnly=true */
8020 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8023 /* We want to avoid using EAX or EDX for the second operand */
8025 destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
8027 /* Make the second operand addressable */
8028 op2 = genCodeForCommaTree(op2);
8030 /* Special case: if op2 is a local var we are done */
8032 if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
8035 addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
8041 genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8043 noway_assert(op2->InReg());
8044 addrReg = genRegMask(op2->gtRegNum);
8047 /* Make sure we have the dividend in EAX */
8051 /* We've previously computed op1 into EAX */
8053 genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
8057 /* Compute op1 into EAX and hold on to it */
8059 genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8062 noway_assert(op1->InReg());
8063 noway_assert(op1->gtRegNum == REG_EAX);
8065 /* We can now safely (we think) grab EDX */
8067 regSet.rsGrabReg(RBM_EDX);
8068 regSet.rsLockReg(RBM_EDX);
8070 /* Convert the integer in EAX into a un/signed long in EDX:EAX */
8072 const genTreeOps oper = tree->OperGet();
8074 if (oper == GT_UMOD || oper == GT_UDIV)
8075 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
8079 /* Make sure the divisor is still addressable */
8081 addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
8083 /* Perform the division */
8085 if (oper == GT_UMOD || oper == GT_UDIV)
8086 inst_TT(INS_UNSIGNED_DIVIDE, op2);
8088 inst_TT(INS_SIGNED_DIVIDE, op2);
8090 /* Free up anything tied up by the divisor's address */
8092 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
8094 /* Unlock and free EDX */
8096 regSet.rsUnlockReg(RBM_EDX);
8098 /* Free up op1 (which is in EAX) as well */
8102 /* Both EAX and EDX are now trashed */
8104 regTracker.rsTrackRegTrash(REG_EAX);
8105 regTracker.rsTrackRegTrash(REG_EDX);
8107 /* Figure out which register the result is in */
8109 reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
8111 /* Don't forget to mark the first operand as using EAX and EDX */
8113 op1->gtRegNum = reg;
8115 genCodeForTree_DONE(tree, reg);
8117 #elif defined(_TARGET_ARM_)
8119 /* Which operand are we supposed to evaluate first? */
8121 if (tree->gtFlags & GTF_REVERSE_OPS)
8123 /* We'll evaluate 'op2' first */
8126 destReg &= ~op1->gtRsvdRegs;
8128 /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
8129 if (op1->gtOper == GT_LCL_VAR)
8131 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
8132 noway_assert(varNum < compiler->lvaCount);
8133 LclVarDsc* varDsc = compiler->lvaTable + varNum;
8134 if (varDsc->lvRegister)
8136 destReg &= ~genRegMask(varDsc->lvRegNum);
8142 /* We'll evaluate 'op1' first */
8145 regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
8147 /* Generate the dividend into a register and hold on to it. */
8149 genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8152 /* Evaluate the second operand into a register and hold onto it. */
8154 genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
8156 noway_assert(op2->InReg());
8157 addrReg = genRegMask(op2->gtRegNum);
8161 // Recover op1 if spilled
8162 genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
8166 /* Compute op1 into any register and hold on to it */
8167 genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8169 noway_assert(op1->InReg());
8171 reg = regSet.rsPickReg(needReg, bestReg);
8173 // Perform the divison
8175 const genTreeOps oper = tree->OperGet();
8177 if (oper == GT_UMOD || oper == GT_UDIV)
8182 getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
8184 if (oper == GT_UMOD || oper == GT_MOD)
8186 getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
8187 getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
8189 /* Free up op1 and op2 */
8193 genCodeForTree_DONE(tree, reg);
8196 #error "Unknown _TARGET_"
8200 /*****************************************************************************
8202 * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
8205 void CodeGen::genCodeForAsgShift(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
8207 assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
8209 const genTreeOps oper = tree->OperGet();
8210 GenTree* op1 = tree->gtOp.gtOp1;
8211 GenTree* op2 = tree->gtOp.gtOp2;
8212 const var_types treeType = tree->TypeGet();
8213 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8214 regMaskTP needReg = destReg;
8222 ins = INS_SHIFT_LEFT_LOGICAL;
8225 ins = INS_SHIFT_RIGHT_ARITHM;
8228 ins = INS_SHIFT_RIGHT_LOGICAL;
8234 noway_assert(!varTypeIsGC(treeType));
8237 /* Shifts by a constant amount are easier */
8239 if (op2->IsCnsIntOrI())
8241 /* Make the target addressable */
8243 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
8245 /* Are we shifting a register left by 1 bit? */
8247 if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && op1->InReg())
8249 /* The target lives in a register */
8251 reg = op1->gtRegNum;
8253 /* "add reg, reg" is cheaper than "shl reg, 1" */
8255 inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
8259 #if CPU_LOAD_STORE_ARCH
8262 regSet.rsLockUsedReg(addrReg);
8264 // Load op1 into a reg
8266 reg = regSet.rsPickReg(RBM_ALLINT);
8268 inst_RV_TT(INS_mov, reg, op1);
8272 inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
8273 regTracker.rsTrackRegTrash(reg);
8275 /* Store the (sign/zero extended) result back to the stack location of the variable */
8277 inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
8279 regSet.rsUnlockUsedReg(addrReg);
8282 #endif // CPU_LOAD_STORE_ARCH
8284 /* Shift by the constant value */
8286 inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
8290 /* If the target is a register, it has a new value */
8293 regTracker.rsTrackRegTrash(op1->gtRegNum);
8295 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8297 /* The zero flag is now equal to the target value */
8298 /* X86: But only if the shift count is != 0 */
8300 if (op2->gtIntCon.gtIconVal != 0)
8302 if (tree->gtSetFlags())
8304 if (op1->gtOper == GT_LCL_VAR)
8306 genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
8308 else if (op1->gtOper == GT_REG_VAR)
8310 genFlagsEqualToReg(tree, op1->gtRegNum);
8316 // It is possible for the shift count to equal 0 with valid
8317 // IL, and not be optimized away, in the case where the node
8318 // is of a small type. The sequence of instructions looks like
8319 // ldsfld, shr, stsfld and executed on a char field. This will
8320 // never happen with code produced by our compilers, because the
8321 // compilers will insert a conv.u2 before the stsfld (which will
8322 // lead us down a different codepath in the JIT and optimize away
8323 // the shift by zero). This case is not worth optimizing and we
8324 // will just make sure to generate correct code for it.
8326 genFlagsEqualToNone();
8331 regMaskTP op2Regs = RBM_NONE;
8332 if (REG_SHIFT != REG_NA)
8333 op2Regs = RBM_SHIFT;
8337 if (tree->gtFlags & GTF_REVERSE_OPS)
8339 tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
8340 genCodeForTree(op2, tempRegs);
8341 regSet.rsMarkRegUsed(op2);
8343 tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
8344 addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8346 genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
8350 /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
8351 regMaskTP excludeMask = op2->gtRsvdRegs;
8352 if (REG_SHIFT != REG_NA)
8353 excludeMask |= RBM_SHIFT;
8355 tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
8356 addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
8358 /* Load the shift count into the necessary register */
8359 genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
8362 /* Make sure the address registers are still here */
8363 addrReg = genKeepAddressable(op1, addrReg, op2Regs);
8365 #ifdef _TARGET_XARCH_
8366 /* Perform the shift */
8367 inst_TT_CL(ins, op1);
8369 /* Perform the shift */
8370 noway_assert(op2->InReg());
8371 op2Regs = genRegMask(op2->gtRegNum);
8373 regSet.rsLockUsedReg(addrReg | op2Regs);
8374 inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
8375 regSet.rsUnlockUsedReg(addrReg | op2Regs);
8377 /* Free the address registers */
8378 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
8380 /* If the value is in a register, it's now trash */
8383 regTracker.rsTrackRegTrash(op1->gtRegNum);
8385 /* Release the op2 [RBM_SHIFT] operand */
8390 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
8393 /*****************************************************************************
8395 * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
8398 void CodeGen::genCodeForShift(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
8400 assert(tree->OperIsShift());
8402 const genTreeOps oper = tree->OperGet();
8403 GenTree* op1 = tree->gtOp.gtOp1;
8404 GenTree* op2 = tree->gtOp.gtOp2;
8405 const var_types treeType = tree->TypeGet();
8406 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
8407 regMaskTP needReg = destReg;
8414 ins = INS_SHIFT_LEFT_LOGICAL;
8417 ins = INS_SHIFT_RIGHT_ARITHM;
8420 ins = INS_SHIFT_RIGHT_LOGICAL;
8426 /* Is the shift count constant? */
8428 if (op2->IsIntCnsFitsInI32())
8430 // TODO: Check to see if we could generate a LEA instead!
8432 /* Compute the left operand into any free register */
8434 genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
8436 noway_assert(op1->InReg());
8437 reg = op1->gtRegNum;
8439 /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
8441 // On ARM, until proven otherwise by performance numbers, just do the shift.
8442 // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
8443 // It's smaller than two "add reg, reg".
8445 CLANG_FORMAT_COMMENT_ANCHOR;
8447 #ifndef _TARGET_ARM_
8450 emitAttr size = emitActualTypeSize(treeType);
8451 if (op2->gtIntConCommon.IconValue() == 1)
8453 /* "add reg, reg" is smaller and faster than "shl reg, 1" */
8454 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8456 else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
8458 /* two "add reg, reg" instructions are faster than "shl reg, 2" */
8459 inst_RV_RV(INS_add, reg, reg, treeType);
8460 inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
8463 goto DO_SHIFT_BY_CNS;
8466 #endif // _TARGET_ARM_
8468 #ifndef _TARGET_ARM_
8470 #endif // _TARGET_ARM_
8471 // If we are shifting 'reg' by zero bits and do not need the flags to be set
8472 // then we can just skip emitting the instruction as 'reg' is already correct.
8474 if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
8476 /* Generate the appropriate shift instruction */
8477 inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
8483 /* Calculate a useful register mask for computing op1 */
8484 needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
8485 regMaskTP op2RegMask;
8486 #ifdef _TARGET_XARCH_
8487 op2RegMask = RBM_ECX;
8489 op2RegMask = RBM_NONE;
8491 needReg = regSet.rsMustExclude(needReg, op2RegMask);
8495 /* Which operand are we supposed to evaluate first? */
8496 if (tree->gtFlags & GTF_REVERSE_OPS)
8498 /* Load the shift count [into ECX on XARCH] */
8499 tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
8500 genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8502 /* We must not target the register that is holding op2 */
8503 needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
8505 /* Now evaluate 'op1' into a free register */
8506 genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8508 /* Recover op2 into ECX */
8509 genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
8513 /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
8514 tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
8515 genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
8517 /* Load the shift count [into ECX on XARCH] */
8518 genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
8521 noway_assert(op2->InReg());
8522 #ifdef _TARGET_XARCH_
8523 noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
8525 // Check for the case of op1 being spilled during the evaluation of op2
8526 if (op1->gtFlags & GTF_SPILLED)
8528 // The register has been spilled -- reload it to any register except ECX
8529 regSet.rsLockUsedReg(op2RegMask);
8530 regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
8531 regSet.rsUnlockUsedReg(op2RegMask);
8534 noway_assert(op1->InReg());
8535 reg = op1->gtRegNum;
8538 /* Perform the shift */
8539 getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
8541 /* Perform the shift */
8542 inst_RV_CL(ins, reg);
8547 noway_assert(op1->InReg());
8548 noway_assert(reg == op1->gtRegNum);
8550 /* The register is now trashed */
8552 regTracker.rsTrackRegTrash(reg);
8554 genCodeForTree_DONE(tree, reg);
8557 /*****************************************************************************
8559 * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
8560 * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
8563 void CodeGen::genCodeForRelop(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
8565 assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
8566 tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
8568 const genTreeOps oper = tree->OperGet();
8569 GenTree* op1 = tree->gtOp.gtOp1;
8570 const var_types treeType = tree->TypeGet();
8571 regMaskTP needReg = destReg;
8574 // Longs and float comparisons are converted to "?:"
8575 noway_assert(!compiler->fgMorphRelopToQmark(op1));
8577 // Check if we can use the currently set flags. Else set them
8579 emitJumpKind jumpKind = genCondSetFlags(tree);
8581 // Grab a register to materialize the bool value into
8583 bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
8585 // Check that the predictor did the right job
8586 noway_assert(bestReg);
8588 // If needReg is in bestReg then use it
8589 if (needReg & bestReg)
8590 reg = regSet.rsGrabReg(needReg & bestReg);
8592 reg = regSet.rsGrabReg(bestReg);
8594 #if defined(_TARGET_ARM_)
8597 // jump-if-true L_true
8607 L_true = genCreateTempLabel();
8608 L_end = genCreateTempLabel();
8610 inst_JMP(jumpKind, L_true);
8611 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
8612 inst_JMP(EJ_jmp, L_end);
8613 genDefineTempLabel(L_true);
8614 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
8615 genDefineTempLabel(L_end);
8617 regTracker.rsTrackRegTrash(reg);
8619 #elif defined(_TARGET_XARCH_)
8620 regMaskTP regs = genRegMask(reg);
8621 noway_assert(regs & RBM_BYTE_REGS);
8623 // Set (lower byte of) reg according to the flags
8625 /* Look for the special case where just want to transfer the carry bit */
8627 if (jumpKind == EJ_jb)
8629 inst_RV_RV(INS_SUBC, reg, reg);
8630 inst_RV(INS_NEG, reg, TYP_INT);
8631 regTracker.rsTrackRegTrash(reg);
8633 else if (jumpKind == EJ_jae)
8635 inst_RV_RV(INS_SUBC, reg, reg);
8636 genIncRegBy(reg, 1, tree, TYP_INT);
8637 regTracker.rsTrackRegTrash(reg);
8641 inst_SET(jumpKind, reg);
8643 regTracker.rsTrackRegTrash(reg);
8645 if (treeType == TYP_INT)
8647 // Set the higher bytes to 0
8648 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
8652 noway_assert(treeType == TYP_BYTE);
8657 #endif // _TARGET_XXX
8659 genCodeForTree_DONE(tree, reg);
8662 //------------------------------------------------------------------------
8663 // genCodeForCopyObj: Generate code for a CopyObj node
8666 // tree - The CopyObj node we are going to generate code for.
8667 // destReg - The register mask for register(s), if any, that will be defined.
8672 void CodeGen::genCodeForCopyObj(GenTree* tree, regMaskTP destReg)
8674 // If the value class doesn't have any fields that are GC refs or
8675 // the target isn't on the GC-heap, we can merge it with CPBLK.
8676 // GC fields cannot be copied directly, instead we will
8677 // need to use a jit-helper for that.
8678 assert(tree->gtOper == GT_ASG);
8679 assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
8681 GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
8682 assert(cpObjOp->HasGCPtr());
8685 if (cpObjOp->IsVolatile())
8687 // Emit a memory barrier instruction before the CopyBlk
8688 instGen_MemoryBarrier();
8691 assert(tree->gtOp.gtOp2->OperIsIndir());
8692 GenTree* srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
8693 GenTree* dstObj = cpObjOp->Addr();
8695 noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
8698 CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
8699 size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
8701 // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
8702 // The EE currently does not allow this. Let's assert it just to be safe.
8703 noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
8706 size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
8707 unsigned slots = cpObjOp->gtSlots;
8708 BYTE* gcPtrs = cpObjOp->gtGcPtrs;
8709 unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
8710 assert(blkSize == cpObjOp->gtBlkSize);
8713 GenTree* treeSecond;
8714 regNumber regFirst, regSecond;
8716 // Check what order the object-ptrs have to be evaluated in ?
8718 if (tree->gtFlags & GTF_REVERSE_OPS)
8721 treeSecond = dstObj;
8722 #if CPU_USES_BLOCK_MOVE
8724 regSecond = REG_EDI;
8726 regFirst = REG_ARG_1;
8727 regSecond = REG_ARG_0;
8733 treeSecond = srcObj;
8734 #if CPU_USES_BLOCK_MOVE
8736 regSecond = REG_ESI;
8738 regFirst = REG_ARG_0;
8739 regSecond = REG_ARG_1;
8743 bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
8744 bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
8745 emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8746 emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
8748 #if CPU_USES_BLOCK_MOVE
8749 // Materialize the trees in the order desired
8751 genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8752 genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8753 genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8755 // Grab ECX because it will be trashed by the helper
8757 regSet.rsGrabReg(RBM_ECX);
8759 while (blkSize >= TARGET_POINTER_SIZE)
8761 if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
8763 // Note that we can use movsd even if it is a GC pointer being transfered
8764 // because the value is not cached anywhere. If we did this in two moves,
8765 // we would have to make certain we passed the appropriate GC info on to
8771 // This helper will act like a MOVSD
8772 // -- inputs EDI and ESI are byrefs
8773 // -- including incrementing of ESI and EDI by 4
8774 // -- helper will trash ECX
8776 regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8777 regSet.rsLockUsedReg(argRegs);
8778 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8780 EA_PTRSIZE); // retSize
8781 regSet.rsUnlockUsedReg(argRegs);
8784 blkSize -= TARGET_POINTER_SIZE;
8787 // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
8789 regTracker.rsTrackRegTrash(REG_EDI);
8790 regTracker.rsTrackRegTrash(REG_ESI);
8791 regTracker.rsTrackRegTrash(REG_ECX);
8793 gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
8795 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8796 it is a emitNoGChelper. However, we have to let the emitter know that
8797 the GC liveness has changed. We do this by creating a new label.
8800 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8802 genDefineTempLabel(&dummyBB);
8804 #else // !CPU_USES_BLOCK_MOVE
8806 #ifndef _TARGET_ARM_
8807 // Currently only the ARM implementation is provided
8808 #error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
8811 // Materialize the trees in the order desired
8817 if ((gcPtrCount > 0) && !dstIsOnStack)
8819 genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8820 genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
8821 genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
8823 /* The helper is a Asm-routine that will trash R2,R3 and LR */
8825 /* Spill any callee-saved registers which are being used */
8826 regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
8830 regSet.rsSpillRegs(spillRegs);
8834 // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
8835 // We will also use it as the temp register for our load/store sequences
8837 assert(REG_R2 == REG_TMP_1);
8838 regTemp = regSet.rsGrabReg(RBM_R2);
8843 genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
8844 genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
8845 genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
8847 // Grab any temp register to use for our load/store sequences
8849 regTemp = regSet.rsGrabReg(RBM_ALLINT);
8852 assert(dstObj->InReg());
8853 assert(srcObj->InReg());
8855 regDst = dstObj->gtRegNum;
8856 regSrc = srcObj->gtRegNum;
8858 assert(regDst != regTemp);
8859 assert(regSrc != regTemp);
8861 instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
8862 instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
8865 while (blkSize >= TARGET_POINTER_SIZE)
8867 CorInfoGCType gcType;
8868 CorInfoGCType gcTypeNext = TYPE_GC_NONE;
8869 var_types type = TYP_I_IMPL;
8871 gcType = (CorInfoGCType)(*gcPtrs++);
8872 if (blkSize > TARGET_POINTER_SIZE)
8873 gcTypeNext = (CorInfoGCType)(*gcPtrs);
8875 if (gcType == TYPE_GC_REF)
8877 else if (gcType == TYPE_GC_BYREF)
8882 assert(regDst == REG_ARG_0);
8883 assert(regSrc == REG_ARG_1);
8884 assert(regTemp == REG_R2);
8887 blkSize -= TARGET_POINTER_SIZE;
8889 emitAttr opSize = emitTypeSize(type);
8891 if (!helperUsed || (gcType == TYPE_GC_NONE))
8893 getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
8894 getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
8895 offset += TARGET_POINTER_SIZE;
8897 if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
8899 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
8900 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
8906 assert(offset == 0);
8908 // The helper will act like this:
8909 // -- inputs R0 and R1 are byrefs
8910 // -- helper will perform copy from *R1 into *R0
8911 // -- helper will perform post increment of R0 and R1 by 4
8912 // -- helper will trash R2
8913 // -- helper will trash R3
8914 // -- calling the helper implicitly trashes LR
8917 regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
8918 regSet.rsLockUsedReg(argRegs);
8919 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
8921 EA_PTRSIZE); // retSize
8923 regSet.rsUnlockUsedReg(argRegs);
8924 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
8928 regTracker.rsTrackRegTrash(regDst);
8929 regTracker.rsTrackRegTrash(regSrc);
8930 regTracker.rsTrackRegTrash(regTemp);
8932 gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
8934 /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
8935 it is a emitNoGChelper. However, we have to let the emitter know that
8936 the GC liveness has changed. We do this by creating a new label.
8939 noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
8941 genDefineTempLabel(&dummyBB);
8943 #endif // !CPU_USES_BLOCK_MOVE
8945 assert(blkSize == 0);
8947 genReleaseReg(dstObj);
8948 genReleaseReg(srcObj);
8950 genCodeForTree_DONE(tree, REG_NA);
8953 if (cpObjOp->IsVolatile())
8955 // Emit a memory barrier instruction after the CopyBlk
8956 instGen_MemoryBarrier();
8961 //------------------------------------------------------------------------
8962 // genCodeForBlkOp: Generate code for a block copy or init operation
8965 // tree - The block assignment
8966 // destReg - The expected destination register
8968 void CodeGen::genCodeForBlkOp(GenTree* tree, regMaskTP destReg)
8970 genTreeOps oper = tree->OperGet();
8971 GenTree* dest = tree->gtOp.gtOp1;
8972 GenTree* src = tree->gtGetOp2();
8973 regMaskTP needReg = destReg;
8974 regMaskTP regs = regSet.rsMaskUsed;
8976 regMaskTP regsPtr[3];
8978 GenTree* srcPtrOrVal;
8980 noway_assert(tree->OperIsBlkOp());
8982 bool isCopyBlk = false;
8983 bool isInitBlk = false;
8984 bool hasGCpointer = false;
8985 unsigned blockSize = dest->AsBlk()->gtBlkSize;
8986 GenTree* sizeNode = nullptr;
8987 bool sizeIsConst = true;
8988 if (dest->gtOper == GT_DYN_BLK)
8990 sizeNode = dest->AsDynBlk()->gtDynamicSize;
8991 sizeIsConst = false;
8994 if (tree->OperIsCopyBlkOp())
8997 if (dest->gtOper == GT_OBJ)
8999 if (dest->AsObj()->gtGcPtrCount != 0)
9001 genCodeForCopyObj(tree, destReg);
9011 // Ensure that we have an address in the CopyBlk case.
9014 // TODO-1stClassStructs: Allow a lclVar here.
9015 assert(src->OperIsIndir());
9016 srcPtrOrVal = src->AsIndir()->Addr();
9024 if (dest->AsBlk()->IsVolatile())
9026 // Emit a memory barrier instruction before the InitBlk/CopyBlk
9027 instGen_MemoryBarrier();
9031 destPtr = dest->AsBlk()->Addr();
9032 noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
9034 (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
9035 (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
9037 noway_assert(destPtr && srcPtrOrVal);
9039 #if CPU_USES_BLOCK_MOVE
9040 regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
9042 /* Some special code for block moves/inits for constant sizes */
9045 // Is this a fixed size COPYBLK?
9046 // or a fixed size INITBLK with a constant init value?
9048 if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
9050 size_t length = blockSize;
9052 instruction ins_P, ins_PR, ins_B;
9057 ins_PR = INS_r_stosp;
9060 /* Properly extend the init constant from a U1 to a U4 */
9061 initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
9063 /* If it is a non-zero value we have to replicate */
9064 /* the byte value four times to form the DWORD */
9065 /* Then we change this new value into the tree-node */
9069 initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9070 #ifdef _TARGET_64BIT_
9073 initVal = initVal | (initVal << 32);
9074 srcPtrOrVal->gtType = TYP_LONG;
9078 srcPtrOrVal->gtType = TYP_INT;
9080 #endif // _TARGET_64BIT_
9082 srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9087 ins_PR = INS_r_movsp;
9091 // Determine if we will be using SSE2
9092 unsigned movqLenMin = 8;
9093 unsigned movqLenMax = 24;
9095 bool bWillUseSSE2 = false;
9096 bool bWillUseOnlySSE2 = false;
9097 bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
9099 #ifdef _TARGET_64BIT_
9101 // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
9102 // there is no point in wasting space on the bigger instructions
9104 #else // !_TARGET_64BIT_
9106 if (compiler->opts.compCanUseSSE2)
9108 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
9110 /* Adjust for BB weight */
9111 if (curBBweight == BB_ZERO_WEIGHT)
9113 // Don't bother with this optimization in
9114 // rarely run blocks
9115 movqLenMax = movqLenMin = 0;
9117 else if (curBBweight < BB_UNITY_WEIGHT)
9119 // Be less aggressive when we are inside a conditional
9122 else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
9124 // Be more aggressive when we are inside a loop
9128 if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
9130 // Be more aggressive when optimizing for speed
9131 // InitBlk uses fewer instructions
9135 if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
9137 bWillUseSSE2 = true;
9139 if ((length % 8) == 0)
9141 bWillUseOnlySSE2 = true;
9142 if (isInitBlk && (initVal == 0))
9144 bNeedEvaluateCnst = false;
9145 noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
9151 #endif // !_TARGET_64BIT_
9153 const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
9154 /* Evaluate dest and src/val */
9156 if (tree->gtFlags & GTF_REVERSE_OPS)
9158 if (bNeedEvaluateCnst)
9160 genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9162 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9163 if (bNeedEvaluateCnst)
9165 genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
9170 genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
9171 if (bNeedEvaluateCnst)
9173 genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
9175 genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
9178 bool bTrashedESI = false;
9179 bool bTrashedEDI = false;
9184 regNumber xmmReg = REG_XMM0;
9190 getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
9191 getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
9195 getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
9199 JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
9200 length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
9206 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9210 getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
9211 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
9219 noway_assert(bNeedEvaluateCnst);
9220 noway_assert(!bWillUseOnlySSE2);
9224 inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
9228 inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
9231 if (length >= REGSIZE_BYTES)
9234 length -= REGSIZE_BYTES;
9238 else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
9240 /* For small code, we can only use ins_DR to generate fast
9241 and small code. We also can't use "rep movsb" because
9242 we may not atomically reading and writing the DWORD */
9244 noway_assert(bNeedEvaluateCnst);
9248 else if (length <= 4 * REGSIZE_BYTES)
9250 noway_assert(bNeedEvaluateCnst);
9252 while (length >= REGSIZE_BYTES)
9255 length -= REGSIZE_BYTES;
9265 noway_assert(bNeedEvaluateCnst);
9267 /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
9268 genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
9270 length &= (REGSIZE_BYTES - 1);
9274 regTracker.rsTrackRegTrash(REG_ECX);
9281 /* Now take care of the remainder */
9282 CLANG_FORMAT_COMMENT_ANCHOR;
9284 #ifdef _TARGET_64BIT_
9287 noway_assert(bNeedEvaluateCnst);
9288 noway_assert(length < 8);
9290 instGen((isInitBlk) ? INS_stosd : INS_movsd);
9298 #endif // _TARGET_64BIT_
9302 noway_assert(bNeedEvaluateCnst);
9314 noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
9316 regTracker.rsTrackRegTrash(REG_EDI);
9318 regTracker.rsTrackRegTrash(REG_ESI);
9319 // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9321 genReleaseReg(destPtr);
9322 if (bNeedEvaluateCnst)
9323 genReleaseReg(srcPtrOrVal);
9328 // This a variable-sized COPYBLK/INITBLK,
9329 // or a fixed size INITBLK with a variable init value,
9332 // What order should the Dest, Val/Src, and Size be calculated
9334 compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
9336 noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
9337 genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
9338 genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
9339 if (opsPtr[2] != nullptr)
9341 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
9343 genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9344 genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9346 noway_assert((destPtr->InReg()) && // Dest
9347 (destPtr->gtRegNum == REG_EDI));
9349 noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9350 (genRegMask(srcPtrOrVal->gtRegNum) == regs));
9354 inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
9358 noway_assert((sizeNode->InReg()) && // Size
9359 (sizeNode->gtRegNum == REG_ECX));
9363 instGen(INS_r_stosb);
9365 instGen(INS_r_movsb);
9367 regTracker.rsTrackRegTrash(REG_EDI);
9368 regTracker.rsTrackRegTrash(REG_ECX);
9371 regTracker.rsTrackRegTrash(REG_ESI);
9372 // else No need to trash EAX as it wasnt destroyed by the "rep stos"
9374 genReleaseReg(opsPtr[0]);
9375 genReleaseReg(opsPtr[1]);
9376 if (opsPtr[2] != nullptr)
9378 genReleaseReg(opsPtr[2]);
9382 #else // !CPU_USES_BLOCK_MOVE
9384 #ifndef _TARGET_ARM_
9385 // Currently only the ARM implementation is provided
9386 #error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
9389 // Is this a fixed size COPYBLK?
9390 // or a fixed size INITBLK with a constant init value?
9392 if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
9394 GenTree* dstOp = destPtr;
9395 GenTree* srcOp = srcPtrOrVal;
9396 unsigned length = blockSize;
9397 unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
9398 unsigned initVal = 0;
9399 bool useLoop = false;
9403 /* Properly extend the init constant from a U1 to a U4 */
9404 initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
9406 /* If it is a non-zero value we have to replicate */
9407 /* the byte value four times to form the DWORD */
9408 /* Then we store this new value into the tree-node */
9412 initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
9413 srcPtrOrVal->gtIntCon.gtIconVal = initVal;
9417 // Will we be using a loop to implement this INITBLK/COPYBLK?
9418 if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
9428 /* Evaluate dest and src/val */
9430 if (tree->gtFlags & GTF_REVERSE_OPS)
9432 genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9433 assert(srcOp->InReg());
9435 genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9436 assert(dstOp->InReg());
9437 regDst = dstOp->gtRegNum;
9439 genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
9440 regSrc = srcOp->gtRegNum;
9444 genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9445 assert(dstOp->InReg());
9447 genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
9448 assert(srcOp->InReg());
9449 regSrc = srcOp->gtRegNum;
9451 genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
9452 regDst = dstOp->gtRegNum;
9454 assert(dstOp->InReg());
9455 assert(srcOp->InReg());
9457 regDst = dstOp->gtRegNum;
9458 regSrc = srcOp->gtRegNum;
9459 usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
9460 bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
9461 emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9466 // Prefer a low register,but avoid one of the ones we've already grabbed
9467 regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9468 usedRegs |= genRegMask(regTemp);
9469 bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
9470 srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
9475 srcType = EA_PTRSIZE;
9478 instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
9479 instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
9483 // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
9486 for (unsigned i = 0; i < fullStoreCount; i++)
9490 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
9491 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
9492 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9493 regTracker.rsTrackRegTrash(regTemp);
9497 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
9501 finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
9502 length -= finalOffset;
9504 else // We will use a loop to implement this INITBLK/COPYBLK
9506 unsigned pairStoreLoopCount = fullStoreCount / 2;
9508 // We need a second temp register for CopyBlk
9509 regNumber regTemp2 = REG_STK;
9512 // Prefer a low register, but avoid one of the ones we've already grabbed
9514 regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9515 usedRegs |= genRegMask(regTemp2);
9518 // Pick and initialize the loop counter register
9519 regNumber regLoopIndex;
9521 regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
9522 genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
9524 // Create and define the Basic Block for the loop top
9525 BasicBlock* loopTopBlock = genCreateTempLabel();
9526 genDefineTempLabel(loopTopBlock);
9531 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9532 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
9533 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9534 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
9535 getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
9536 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9537 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
9538 regTracker.rsTrackRegTrash(regSrc);
9539 regTracker.rsTrackRegTrash(regTemp);
9540 regTracker.rsTrackRegTrash(regTemp2);
9544 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9545 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
9548 getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
9549 regTracker.rsTrackRegTrash(regDst);
9550 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
9551 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
9552 inst_JMP(jmpGTS, loopTopBlock);
9554 regTracker.rsTrackRegIntCns(regLoopIndex, 0);
9556 length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
9558 if (length & TARGET_POINTER_SIZE)
9562 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
9563 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
9567 getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
9569 finalOffset = TARGET_POINTER_SIZE;
9570 length -= TARGET_POINTER_SIZE;
9578 if (length & sizeof(short))
9580 loadIns = ins_Load(TYP_USHORT); // INS_ldrh
9581 storeIns = ins_Store(TYP_USHORT); // INS_strh
9585 getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
9586 getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
9587 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9588 regTracker.rsTrackRegTrash(regTemp);
9592 getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
9594 length -= sizeof(short);
9595 finalOffset += sizeof(short);
9598 if (length & sizeof(char))
9600 loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
9601 storeIns = ins_Store(TYP_UBYTE); // INS_strb
9605 getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
9606 getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
9607 gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
9608 regTracker.rsTrackRegTrash(regTemp);
9612 getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
9614 length -= sizeof(char);
9616 assert(length == 0);
9618 genReleaseReg(dstOp);
9619 genReleaseReg(srcOp);
9624 // This a variable-sized COPYBLK/INITBLK,
9625 // or a fixed size INITBLK with a variable init value,
9628 // What order should the Dest, Val/Src, and Size be calculated
9630 regMaskTP regsToLock = RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2;
9632 compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
9634 genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
9635 genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
9636 if (opsPtr[2] != nullptr)
9638 genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
9642 regSet.rsLockReg(RBM_ARG_2);
9643 regsToLock &= ~RBM_ARG_2;
9645 genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
9646 genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
9648 noway_assert((destPtr->InReg()) && // Dest
9649 (destPtr->gtRegNum == REG_ARG_0));
9651 noway_assert((srcPtrOrVal->InReg()) && // Val/Src
9652 (srcPtrOrVal->gtRegNum == REG_ARG_1));
9656 inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
9660 noway_assert((sizeNode->InReg()) && // Size
9661 (sizeNode->gtRegNum == REG_ARG_2));
9664 regSet.rsLockUsedReg(regsToLock);
9666 genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
9668 : CORINFO_HELP_MEMSET,
9671 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
9673 regSet.rsUnlockUsedReg(regsToLock);
9674 genReleaseReg(opsPtr[0]);
9675 genReleaseReg(opsPtr[1]);
9676 if (opsPtr[2] != nullptr)
9678 genReleaseReg(opsPtr[2]);
9682 regSet.rsUnlockReg(RBM_ARG_2);
9686 if (isCopyBlk && dest->AsBlk()->IsVolatile())
9688 // Emit a memory barrier instruction after the CopyBlk
9689 instGen_MemoryBarrier();
9691 #endif // !CPU_USES_BLOCK_MOVE
9697 #pragma warning(push)
9698 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
9700 void CodeGen::genCodeForTreeSmpOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
9702 const genTreeOps oper = tree->OperGet();
9703 const var_types treeType = tree->TypeGet();
9704 GenTree* op1 = tree->gtOp.gtOp1;
9705 GenTree* op2 = tree->gtGetOp2IfPresent();
9706 regNumber reg = DUMMY_INIT(REG_CORRUPT);
9707 regMaskTP regs = regSet.rsMaskUsed;
9708 regMaskTP needReg = destReg;
9709 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
9714 regMaskTP regsPtr[3];
9717 addrReg = 0xDEADCAFE;
9720 noway_assert(tree->OperKind() & GTK_SMPOP);
9725 if (tree->OperIsBlkOp() && op1->gtOper != GT_LCL_VAR)
9727 genCodeForBlkOp(tree, destReg);
9731 genCodeForTreeSmpOpAsg(tree);
9738 genCodeForAsgShift(tree, destReg, bestReg);
9746 genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
9750 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
9751 #ifdef _TARGET_XARCH_
9752 // Note that the specialCase here occurs when the treeType specifies a byte sized operation
9753 // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
9756 specialCase = false;
9757 if (op1->gtOper == GT_REG_VAR)
9759 /* Get hold of the target register */
9761 reg = op1->gtRegVar.gtRegNum;
9762 if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
9764 regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
9766 inst_RV_RV(INS_mov, byteReg, reg);
9767 regTracker.rsTrackRegTrash(byteReg);
9769 inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
9770 var_types op1Type = op1->TypeGet();
9771 instruction wideningIns = ins_Move_Extend(op1Type, true);
9772 inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
9773 regTracker.rsTrackRegTrash(reg);
9780 inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
9782 #else // not _TARGET_XARCH_
9785 inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
9789 // Fix 388382 ARM JitStress WP7
9790 var_types op1Type = op1->TypeGet();
9791 regNumber reg = regSet.rsPickFreeReg();
9792 inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
9793 regTracker.rsTrackRegTrash(reg);
9794 inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
9795 inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
9799 regTracker.rsTrackRegTrash(op1->gtRegNum);
9800 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
9802 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
9811 genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
9815 genCodeForUnsignedMod(tree, destReg, bestReg);
9819 genCodeForSignedMod(tree, destReg, bestReg);
9823 genCodeForUnsignedDiv(tree, destReg, bestReg);
9827 genCodeForSignedDiv(tree, destReg, bestReg);
9833 genCodeForShift(tree, destReg, bestReg);
9839 /* Generate the operand into some register */
9841 genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
9842 noway_assert(op1->InReg());
9844 reg = op1->gtRegNum;
9846 /* Negate/reverse the value in the register */
9848 inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
9850 /* The register is now trashed */
9852 regTracker.rsTrackRegTrash(reg);
9854 genCodeForTree_DONE(tree, reg);
9858 case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
9860 /* Make sure the operand is addressable */
9862 addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
9864 genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9866 /* Figure out the size of the value being loaded */
9868 size = EA_ATTR(genTypeSize(tree->gtType));
9870 /* Pick a register for the value */
9872 if (needReg == RBM_ALLINT && bestReg == 0)
9874 /* Absent a better suggestion, pick a useless register */
9876 bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
9879 reg = regSet.rsPickReg(needReg, bestReg);
9881 if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
9883 noway_assert(size == EA_PTRSIZE);
9884 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
9885 (int)op1->gtIntCon.gtIconVal);
9889 /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
9891 inst_mov_RV_ST(reg, tree);
9895 if (tree->gtFlags & GTF_IND_VOLATILE)
9897 // Emit a memory barrier instruction after the load
9898 instGen_MemoryBarrier();
9902 /* Note the new contents of the register we used */
9904 regTracker.rsTrackRegTrash(reg);
9907 /* Update the live set of register variables */
9908 if (compiler->opts.varNames)
9909 genUpdateLife(tree);
9912 /* Now we can update the register pointer information */
9914 // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
9915 gcInfo.gcMarkRegPtrVal(reg, treeType);
9917 genCodeForTree_DONE_LIFE(tree, reg);
9922 genCodeForNumericCast(tree, destReg, bestReg);
9927 /* Is this a test of a relational operator? */
9929 if (op1->OperIsCompare())
9931 /* Generate the conditional jump */
9935 genUpdateLife(tree);
9940 compiler->gtDispTree(tree);
9942 NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
9946 genCodeForSwitch(tree);
9950 noway_assert(tree->gtType == TYP_VOID || op1 != 0);
9951 if (op1 == 0) // endfinally
9955 #ifdef _TARGET_XARCH_
9956 /* Return using a pop-jmp sequence. As the "try" block calls
9957 the finally with a jmp, this leaves the x86 call-ret stack
9958 balanced in the normal flow of path. */
9960 noway_assert(isFramePointerRequired());
9961 inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
9962 inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
9963 #elif defined(_TARGET_ARM_)
9964 // Nothing needed for ARM
9971 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
9972 noway_assert(op1->InReg());
9973 noway_assert(op1->gtRegNum == REG_INTRET);
9974 /* The return value has now been computed */
9975 reg = op1->gtRegNum;
9981 genCodeForTree_DONE(tree, reg);
9986 // TODO: this should be done AFTER we called exit mon so that
9987 // we are sure that we don't have to keep 'this' alive
9989 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
9991 /* either it's an "empty" statement or the return statement
9992 of a synchronized method
9995 genPInvokeMethodEpilog();
9998 /* Is there a return value and/or an exit statement? */
10002 if (op1->gtType == TYP_VOID)
10004 // We're returning nothing, just generate the block (shared epilog calls).
10005 genCodeForTree(op1, 0);
10007 #ifdef _TARGET_ARM_
10008 else if (op1->gtType == TYP_STRUCT)
10010 if (op1->gtOper == GT_CALL)
10012 // We have a return call() because we failed to tail call.
10013 // In any case, just generate the call and be done.
10014 assert(compiler->IsHfa(op1));
10015 genCodeForCall(op1->AsCall(), true);
10016 genMarkTreeInReg(op1, REG_FLOATRET);
10020 assert(op1->gtOper == GT_LCL_VAR);
10021 assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
10022 genLoadIntoFltRetRegs(op1);
10025 else if (op1->TypeGet() == TYP_FLOAT)
10027 // This can only occur when we are returning a non-HFA struct
10028 // that is composed of a single float field and we performed
10029 // struct promotion and enregistered the float field.
10031 genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
10032 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
10034 #endif // _TARGET_ARM_
10037 // we can now go through this code for compiler->genReturnBB. I've regularized all the code.
10039 // noway_assert(compiler->compCurBB != compiler->genReturnBB);
10041 noway_assert(op1->gtType != TYP_VOID);
10043 /* Generate the return value into the return register */
10045 genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
10047 /* The result must now be in the return register */
10049 noway_assert(op1->InReg());
10050 noway_assert(op1->gtRegNum == REG_INTRET);
10053 /* The return value has now been computed */
10055 reg = op1->gtRegNum;
10057 genCodeForTree_DONE(tree, reg);
10060 #ifdef PROFILING_SUPPORTED
10061 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
10062 // the GT_RETURN tree.
10064 if (compiler->compCurBB == compiler->genReturnBB)
10066 genProfilingLeaveCallback();
10070 if (compiler->opts.compStackCheckOnRet)
10072 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
10073 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
10074 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
10075 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
10077 BasicBlock* esp_check = genCreateTempLabel();
10078 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
10079 inst_JMP(jmpEqual, esp_check);
10080 getEmitter()->emitIns(INS_BREAKPOINT);
10081 genDefineTempLabel(esp_check);
10088 if (tree->gtFlags & GTF_REVERSE_OPS)
10090 if (tree->gtType == TYP_VOID)
10092 genEvalSideEffects(op2);
10093 genUpdateLife(op2);
10094 genEvalSideEffects(op1);
10095 genUpdateLife(tree);
10100 genCodeForTree(op2, needReg);
10101 genUpdateLife(op2);
10103 noway_assert(op2->InReg());
10105 regSet.rsMarkRegUsed(op2);
10107 // Do side effects of op1
10108 genEvalSideEffects(op1);
10110 // Recover op2 if spilled
10111 genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
10113 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
10115 // set gc info if we need so
10116 gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
10118 genUpdateLife(tree);
10119 genCodeForTree_DONE(tree, op2->gtRegNum);
10125 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
10127 /* Generate side effects of the first operand */
10129 genEvalSideEffects(op1);
10130 genUpdateLife(op1);
10132 /* Is the value of the second operand used? */
10134 if (tree->gtType == TYP_VOID)
10136 /* The right operand produces no result. The morpher is
10137 responsible for resetting the type of GT_COMMA nodes
10138 to TYP_VOID if op2 isn't meant to yield a result. */
10140 genEvalSideEffects(op2);
10141 genUpdateLife(tree);
10145 /* Generate the second operand, i.e. the 'real' value */
10147 genCodeForTree(op2, needReg);
10148 noway_assert(op2->InReg());
10150 /* The result of 'op2' is also the final result */
10152 reg = op2->gtRegNum;
10154 /* Remember whether we set the flags */
10156 tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
10158 genCodeForTree_DONE(tree, reg);
10163 genCodeForTree(op1, needReg);
10164 noway_assert(op1->InReg());
10166 /* The result of 'op1' is also the final result */
10168 reg = op1->gtRegNum;
10170 /* Remember whether we set the flags */
10172 tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
10174 genCodeForTree_DONE(tree, reg);
10179 genCodeForQmark(tree, destReg, bestReg);
10192 /* Generate the operand into some register */
10194 genCodeForTree(op1, needReg);
10196 /* The result is the same as the operand */
10198 reg = op1->gtRegNum;
10200 genCodeForTree_DONE(tree, reg);
10205 switch (tree->gtIntrinsic.gtIntrinsicId)
10207 case CORINFO_INTRINSIC_Round:
10209 noway_assert(tree->gtType == TYP_INT);
10211 #if FEATURE_STACK_FP_X87
10212 genCodeForTreeFlt(op1);
10214 /* Store the FP value into the temp */
10215 TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
10217 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10218 FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
10219 inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
10221 reg = regSet.rsPickReg(needReg, bestReg);
10222 regTracker.rsTrackRegTrash(reg);
10224 inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
10226 compiler->tmpRlsTemp(temp);
10228 genCodeForTreeFloat(tree, needReg, bestReg);
10235 noway_assert(!"unexpected math intrinsic");
10238 genCodeForTree_DONE(tree, reg);
10243 reg = genLclHeap(op1);
10244 genCodeForTree_DONE(tree, reg);
10253 genCodeForRelop(tree, destReg, bestReg);
10258 genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
10261 #ifdef _TARGET_XARCH_
10264 // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere.
10265 // For reference, op1 is the location. op2 is the addend or the value.
10266 if (op2->OperIsConst())
10268 noway_assert(op2->TypeGet() == TYP_INT);
10269 ssize_t cns = op2->gtIntCon.gtIconVal;
10271 genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
10276 instEmit_RM(INS_inc, op1, op1, 0);
10280 instEmit_RM(INS_dec, op1, op1, 0);
10283 assert((int)cns == cns); // By test above for AMD64.
10285 inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
10288 genReleaseReg(op1);
10292 // non constant addend means it needs to go into a register.
10294 goto LockBinOpCommon;
10297 genFlagsEqualToNone(); // We didn't compute a result into a register.
10298 genUpdateLife(tree); // We didn't compute an operand into anything.
10303 goto LockBinOpCommon;
10306 goto LockBinOpCommon;
10309 // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2
10310 // is a constant, it needs to be in a register. This should be the output register if
10313 // For reference, gtOp1 is the location. gtOp2 is the addend or the value.
10315 GenTree* location = op1;
10316 GenTree* value = op2;
10318 // Again, a friendly reminder. IL calling convention is left to right.
10319 if (tree->gtFlags & GTF_REVERSE_OPS)
10321 // The atomic operations destroy this argument, so force it into a scratch register
10322 reg = regSet.rsPickFreeReg();
10323 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10325 // Must evaluate location into a register
10326 genCodeForTree(location, needReg, RBM_NONE);
10327 assert(location->InReg());
10328 regSet.rsMarkRegUsed(location);
10329 regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
10330 genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
10331 regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
10333 if (ins != INS_xchg)
10335 // xchg implies the lock prefix, but xadd and add require it.
10338 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10339 genReleaseReg(value);
10340 regTracker.rsTrackRegTrash(reg);
10341 genReleaseReg(location);
10346 if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
10347 needReg, RegSet::KEEP_REG, &addrReg))
10349 genUpdateLife(location);
10351 reg = regSet.rsPickFreeReg();
10352 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10353 addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
10355 if (ins != INS_xchg)
10357 // xchg implies the lock prefix, but xadd and add require it.
10361 // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10362 // inst_TT_RV(ins, location, reg);
10363 sched_AM(ins, EA_4BYTE, reg, false, location, 0);
10365 genReleaseReg(value);
10366 regTracker.rsTrackRegTrash(reg);
10367 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
10371 // Must evalute location into a register.
10372 genCodeForTree(location, needReg, RBM_NONE);
10373 assert(location->InReg());
10374 regSet.rsMarkRegUsed(location);
10376 // xadd destroys this argument, so force it into a scratch register
10377 reg = regSet.rsPickFreeReg();
10378 genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
10379 regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
10380 genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
10381 regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
10383 if (ins != INS_xchg)
10385 // xchg implies the lock prefix, but xadd and add require it.
10389 instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
10391 genReleaseReg(value);
10392 regTracker.rsTrackRegTrash(reg);
10393 genReleaseReg(location);
10397 // The flags are equal to the target of the tree (i.e. the result of the add), not to the
10398 // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
10399 // that information to set the flags. Doesn't seem like there is a good reason for that.
10400 // Therefore, trash the flags.
10401 genFlagsEqualToNone();
10403 if (ins == INS_add)
10405 // If the operator was add, then we were called from the GT_LOCKADD
10406 // case. In that case we don't use the result, so we don't need to
10407 // update anything.
10408 genUpdateLife(tree);
10412 genCodeForTree_DONE(tree, reg);
10417 #endif // _TARGET_XARCH_
10419 case GT_ARR_LENGTH:
10421 // Make the corresponding ind(a + c) node, and do codegen for that.
10422 GenTree* addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
10423 compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
10424 tree->SetOper(GT_IND);
10425 tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
10426 assert(tree->TypeGet() == TYP_INT);
10427 tree->gtOp.gtOp1 = addr;
10428 genCodeForTree(tree, destReg, bestReg);
10433 // All GT_OBJ nodes must have been morphed prior to this.
10434 noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
10438 compiler->gtDispTree(tree);
10440 noway_assert(!"unexpected unary/binary operator");
10441 } // end switch (oper)
10446 #pragma warning(pop) // End suppress PREFast warning about overly large function
10449 regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
10456 GenTree* op1 = tree->gtOp.gtOp1->gtEffectiveVal();
10457 var_types dstType = tree->CastToType();
10458 var_types srcType = op1->TypeGet();
10460 if (genTypeSize(srcType) < genTypeSize(dstType))
10464 /* we need the source size */
10466 size = EA_ATTR(genTypeSize(srcType));
10468 noway_assert(size < EA_PTRSIZE);
10470 unsv = varTypeIsUnsigned(srcType);
10471 ins = ins_Move_Extend(srcType, op1->InReg());
10474 Special case: for a cast of byte to char we first
10475 have to expand the byte (w/ sign extension), then
10476 mask off the high bits.
10477 Use 'movsx' followed by 'and'
10479 if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
10481 noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
10487 // Narrowing cast, or sign-changing cast
10489 noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
10491 size = EA_ATTR(genTypeSize(dstType));
10493 unsv = varTypeIsUnsigned(dstType);
10494 ins = ins_Move_Extend(dstType, op1->InReg());
10497 noway_assert(size < EA_PTRSIZE);
10499 // Set bestReg to the same register a op1 if op1 is a regVar and is available
10502 regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
10503 if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
10505 bestReg = op1RegMask;
10509 /* Is the value sitting in a non-byte-addressable register? */
10511 if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
10515 // for unsigned values we can AND, so it needs not be a byte register
10517 reg = regSet.rsPickReg(needReg, bestReg);
10523 /* Move the value into a byte register */
10525 reg = regSet.rsGrabReg(RBM_BYTE_REGS);
10528 if (reg != op1->gtRegNum)
10530 /* Move the value into that register */
10532 regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
10533 inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
10535 /* The value has a new home now */
10537 op1->gtRegNum = reg;
10542 /* Pick a register for the value (general case) */
10544 reg = regSet.rsPickReg(needReg, bestReg);
10546 // if we (might) need to set the flags and the value is in the same register
10547 // and we have an unsigned value then use AND instead of MOVZX
10548 if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
10550 #ifdef _TARGET_X86_
10551 noway_assert(ins == INS_movzx);
10557 if (ins == INS_AND)
10559 noway_assert(andv == false && unsv);
10561 /* Generate "and reg, MASK */
10563 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10564 inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
10566 if (tree->gtSetFlags())
10567 genFlagsEqualToReg(tree, reg);
10571 #ifdef _TARGET_XARCH_
10572 noway_assert(ins == INS_movsx || ins == INS_movzx);
10575 /* Generate "movsx/movzx reg, [addr]" */
10577 inst_RV_ST(ins, size, reg, op1);
10579 /* Mask off high bits for cast from byte to char */
10583 #ifdef _TARGET_XARCH_
10584 noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
10586 insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
10587 inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
10589 if (tree->gtSetFlags())
10590 genFlagsEqualToReg(tree, reg);
10594 regTracker.rsTrackRegTrash(reg);
10598 void CodeGen::genCodeForNumericCast(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
10600 GenTree* op1 = tree->gtOp.gtOp1;
10601 var_types dstType = tree->CastToType();
10602 var_types baseType = TYP_INT;
10603 regNumber reg = DUMMY_INIT(REG_CORRUPT);
10604 regMaskTP needReg = destReg;
10610 * Constant casts should have been folded earlier
10611 * If not finite don't bother
10612 * We don't do this optimization for debug code/no optimization
10616 (op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) || tree->gtOverflow() ||
10617 (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
10618 (op1->gtOper == GT_CNS_DBL && op1->gtDblCon.gtDconVal <= -1.0 && varTypeIsUnsigned(tree->CastToType())) ||
10619 !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
10621 noway_assert(dstType != TYP_VOID);
10623 /* What type are we casting from? */
10625 switch (op1->TypeGet())
10629 /* Special case: the long is generated via the mod of long
10630 with an int. This is really an int and need not be
10631 converted to a reg pair. NOTE: the flag only indicates
10632 that this is a case to TYP_INT, it hasn't actually
10633 verified the second operand of the MOD! */
10635 if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
10638 /* Verify that the op2 of the mod node is
10639 1) An integer tree, or
10640 2) A long constant that is small enough to fit in an integer
10643 GenTree* modop2 = op1->gtOp.gtOp2;
10644 if ((genActualType(modop2->gtType) == TYP_INT) ||
10645 ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
10647 genCodeForTree(op1, destReg, bestReg);
10649 #ifdef _TARGET_64BIT_
10650 reg = op1->gtRegNum;
10651 #else // _TARGET_64BIT_
10652 reg = genRegPairLo(op1->gtRegPair);
10653 #endif //_TARGET_64BIT_
10655 genCodeForTree_DONE(tree, reg);
10660 /* Make the operand addressable. When gtOverflow() is true,
10661 hold on to the addrReg as we will need it to access the higher dword */
10663 op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
10665 // See, e.g., the TYP_INT case below...
10667 addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
10669 /* Load the lower half of the value into some register */
10673 /* Can we simply use the low part of the value? */
10674 reg = genRegPairLo(op1->gtRegPair);
10676 if (tree->gtOverflow())
10680 loMask = genRegMask(reg);
10681 if (loMask & regSet.rsRegMaskFree())
10685 // for cast overflow we need to preserve addrReg for testing the hiDword
10686 // so we lock it to prevent regSet.rsPickReg from picking it.
10687 if (tree->gtOverflow())
10688 regSet.rsLockUsedReg(addrReg);
10690 reg = regSet.rsPickReg(needReg, bestReg);
10692 if (tree->gtOverflow())
10693 regSet.rsUnlockUsedReg(addrReg);
10695 noway_assert(genStillAddressable(op1));
10698 if (!op1->InReg() || (reg != genRegPairLo(op1->gtRegPair)))
10700 /* Generate "mov reg, [addr-mode]" */
10701 inst_RV_TT(ins_Load(TYP_INT), reg, op1);
10704 /* conv.ovf.i8i4, or conv.ovf.u8u4 */
10706 if (tree->gtOverflow())
10708 regNumber hiReg = (op1->InReg()) ? genRegPairHi(op1->gtRegPair) : REG_NA;
10710 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
10711 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
10717 /* Generate the following sequence
10719 test loDWord, loDWord // set flags
10721 pos: test hiDWord, hiDWord // set flags
10724 neg: cmp hiDWord, 0xFFFFFFFF
10730 instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
10731 if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0)
10733 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
10734 goto UPPER_BITS_ZERO;
10737 #if CPU_LOAD_STORE_ARCH
10739 // We will generate code like
10748 // We load the tree op1 into regs when we generate code for if clause.
10749 // When we generate else clause, we see the tree is already loaded into reg, and start use it
10751 // Well, when the code is run, we may execute else clause without going through if clause.
10753 genCodeForTree(op1, 0);
10759 neg = genCreateTempLabel();
10760 done = genCreateTempLabel();
10762 // Is the loDWord positive or negative
10763 inst_JMP(jmpLTS, neg);
10765 // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
10767 if (hiReg < REG_STK)
10769 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
10773 inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
10776 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10777 inst_JMP(EJ_jmp, done);
10779 // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
10781 genDefineTempLabel(neg);
10783 if (hiReg < REG_STK)
10785 inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
10789 inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
10791 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10795 genDefineTempLabel(done);
10799 case TYP_UINT: // conv.ovf.u8u4
10801 // Just check that the upper DWord is 0
10803 if (hiReg < REG_STK)
10805 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
10809 inst_TT_IV(INS_cmp, op1, 0, 4);
10812 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
10816 noway_assert(!"Unexpected dstType");
10820 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
10823 regTracker.rsTrackRegTrash(reg);
10824 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
10826 genCodeForTree_DONE(tree, reg);
10840 #if FEATURE_STACK_FP_X87
10842 NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
10846 if (compiler->opts.compCanUseSSE2)
10848 // do the SSE2 based cast inline
10849 // getting the fp operand
10851 regMaskTP addrRegInt = 0;
10852 regMaskTP addrRegFlt = 0;
10854 // make the operand addressable
10855 // We don't want to collapse constant doubles into floats, as the SSE2 instruction
10856 // operates on doubles. Note that these (casts from constant doubles) usually get
10857 // folded, but we don't do it for some cases (infinitys, etc). So essentially this
10858 // shouldn't affect performance or size at all. We're fixing this for #336067
10859 op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
10860 if (!addrRegFlt && !op1->IsRegVar())
10862 // we have the address
10864 inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
10865 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10866 genUpdateLife(op1);
10868 reg = regSet.rsPickReg(needReg);
10869 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10871 regTracker.rsTrackRegTrash(reg);
10872 genCodeForTree_DONE(tree, reg);
10876 // we will need to use a temp to get it into the xmm reg
10877 var_types typeTemp = op1->TypeGet();
10878 TempDsc* temp = compiler->tmpGetTemp(typeTemp);
10880 size = EA_ATTR(genTypeSize(typeTemp));
10884 // On the fp stack; Take reg to top of stack
10886 FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
10892 reg = regSet.PickRegFloat();
10893 if (!op1->IsRegVarDeath())
10895 // Load it on the fp stack
10896 genLoadStackFP(op1, reg);
10900 // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
10901 genLoadStackFP(op1, reg);
10902 FlatFPX87_MoveToTOS(&compCurFPState, reg);
10906 // pop it off the fp stack
10907 compCurFPState.Pop();
10909 getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
10911 reg = regSet.rsPickReg(needReg);
10913 inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
10914 getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
10916 // done..release the temp
10917 compiler->tmpRlsTemp(temp);
10919 // the reg is now trashed
10920 regTracker.rsTrackRegTrash(reg);
10921 genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
10922 genUpdateLife(op1);
10923 genCodeForTree_DONE(tree, reg);
10929 genCodeForTreeFloat(tree, needReg, bestReg);
10930 #endif // FEATURE_STACK_FP_X87
10934 noway_assert(!"unexpected cast type");
10937 if (tree->gtOverflow())
10939 /* Compute op1 into a register, and free the register */
10941 genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
10942 reg = op1->gtRegNum;
10944 /* Do we need to compare the value, or just check masks */
10946 ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
10952 typeMask = ssize_t((int)0xFFFFFF80);
10953 typeMin = SCHAR_MIN;
10954 typeMax = SCHAR_MAX;
10955 unsv = (tree->gtFlags & GTF_UNSIGNED);
10958 typeMask = ssize_t((int)0xFFFF8000);
10959 typeMin = SHRT_MIN;
10960 typeMax = SHRT_MAX;
10961 unsv = (tree->gtFlags & GTF_UNSIGNED);
10964 typeMask = ssize_t((int)0x80000000L);
10965 #ifdef _TARGET_64BIT_
10966 unsv = (tree->gtFlags & GTF_UNSIGNED);
10969 #else // _TARGET_64BIT_
10970 noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
10972 #endif // _TARGET_64BIT_
10976 typeMask = ssize_t((int)0xFFFFFF00L);
10980 typeMask = ssize_t((int)0xFFFF0000L);
10984 #ifdef _TARGET_64BIT_
10985 typeMask = 0xFFFFFFFF00000000LL;
10986 #else // _TARGET_64BIT_
10987 typeMask = 0x80000000L;
10988 noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
10989 #endif // _TARGET_64BIT_
10992 NO_WAY("Unknown type");
10996 // If we just have to check a mask.
10997 // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
11002 inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
11003 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
11004 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
11008 // Check the value is in range.
11009 // This must be conv.ovf.i4i1, etc.
11011 // Compare with the MAX
11013 noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
11015 inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
11016 emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
11017 genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
11019 // Compare with the MIN
11021 inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
11022 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
11023 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
11026 genCodeForTree_DONE(tree, reg);
11030 /* Make the operand addressable */
11032 addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
11034 reg = genIntegerCast(tree, needReg, bestReg);
11036 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11038 genCodeForTree_DONE(tree, reg);
11041 /*****************************************************************************
11043 * Generate code for a leaf node of type GT_ADDR
11046 void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
11048 genTreeOps oper = tree->OperGet();
11049 const var_types treeType = tree->TypeGet();
11052 regMaskTP needReg = destReg;
11056 reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use
11057 addrReg = 0xDEADCAFE;
11060 // We should get here for ldloca, ldarga, ldslfda, ldelema,
11062 if (oper == GT_ARR_ELEM)
11068 op1 = tree->gtOp.gtOp1;
11071 // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
11072 if (oper == GT_ARR_ELEM)
11074 // To get the address of the array element,
11075 // we first call genMakeAddrArrElem to make the element addressable.
11076 // (That is, for example, we first emit code to calculate EBX, and EAX.)
11077 // And then use lea to obtain the address.
11078 // (That is, for example, we then emit
11079 // lea EBX, bword ptr [EBX+4*EAX+36]
11080 // to obtain the address of the array element.)
11081 addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
11085 addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
11088 noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
11090 // We want to reuse one of the scratch registers that were used
11091 // in forming the address mode as the target register for the lea.
11092 // If bestReg is unset or if it is set to one of the registers used to
11093 // form the address (i.e. addrReg), we calculate the scratch register
11094 // to use as the target register for the LEA
11096 bestReg = regSet.rsUseIfZero(bestReg, addrReg);
11097 bestReg = regSet.rsNarrowHint(bestReg, addrReg);
11099 /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
11100 it since keepReg==false.
11101 If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
11102 So this is guaranteed not to spill addrReg */
11104 reg = regSet.rsPickReg(needReg, bestReg);
11106 // Slight workaround, force the inst routine to think that
11107 // value being loaded is an int (since that is what what
11108 // LEA will return) otherwise it would try to allocate
11109 // two registers for a long etc.
11110 noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
11111 op1->gtType = treeType;
11113 inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
11115 // The Lea instruction above better not have tried to put the
11116 // 'value' pointed to by 'op1' in a register, LEA will not work.
11117 noway_assert(!(op1->InReg()));
11119 genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
11120 // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
11121 noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
11123 regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
11124 gcInfo.gcMarkRegPtrVal(reg, treeType);
11126 genCodeForTree_DONE(tree, reg);
11129 #ifdef _TARGET_ARM_
11131 /*****************************************************************************
11133 * Move (load/store) between float ret regs and struct promoted variable.
11135 * varDsc - The struct variable to be loaded from or stored into.
11136 * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
11139 void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
11141 regNumber curReg = REG_FLOATRET;
11143 unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
11144 for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
11146 LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
11148 // Is the struct field promoted and sitting in a register?
11149 if (varDscFld->lvRegister)
11151 // Move from the struct field into curReg if load
11152 // else move into struct field from curReg if store
11153 regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
11154 regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
11155 if (srcReg != dstReg)
11157 inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
11158 regTracker.rsTrackRegCopy(dstReg, srcReg);
11163 // This field is in memory, do a move between the field and float registers.
11164 emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
11167 getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11168 regTracker.rsTrackRegTrash(curReg);
11172 getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
11176 // Advance the current reg.
11177 curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
11181 void CodeGen::genLoadIntoFltRetRegs(GenTree* tree)
11183 assert(tree->TypeGet() == TYP_STRUCT);
11184 assert(tree->gtOper == GT_LCL_VAR);
11185 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
11186 int slots = varDsc->lvSize() / REGSIZE_BYTES;
11187 if (varDsc->lvPromoted)
11189 genLdStFltRetRegsPromotedVar(varDsc, true);
11195 // Use the load float/double instruction.
11196 inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
11197 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11201 // Use the load store multiple instruction.
11202 regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11203 inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
11204 regTracker.rsTrackRegTrash(reg);
11205 getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11208 genMarkTreeInReg(tree, REG_FLOATRET);
11211 void CodeGen::genStoreFromFltRetRegs(GenTree* tree)
11213 assert(tree->TypeGet() == TYP_STRUCT);
11214 assert(tree->OperGet() == GT_ASG);
11216 // LHS should be lcl var or fld.
11217 GenTree* op1 = tree->gtOp.gtOp1;
11219 // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
11220 // handling multiple levels of inlined functions that return HFA on the right-hand-side.
11221 // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
11222 // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
11223 // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
11224 // as a regular assert().
11225 noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
11226 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11227 assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
11229 // The RHS should be a call.
11230 GenTree* op2 = tree->gtOp.gtOp2;
11231 assert(op2->gtOper == GT_CALL);
11233 // Generate code for call and copy the return registers into the local.
11234 regMaskTP retMask = genCodeForCall(op2->AsCall(), true);
11236 // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
11237 CLANG_FORMAT_COMMENT_ANCHOR;
11240 regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
11241 assert((mask & (mask - 1)) == 0);
11242 assert(mask <= (1 << MAX_HFA_RET_SLOTS));
11243 assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
11246 int slots = genCountBits(retMask & RBM_ALLFLOAT);
11248 LclVarDsc* varDsc = &compiler->lvaTable[varNum];
11250 if (varDsc->lvPromoted)
11252 genLdStFltRetRegsPromotedVar(varDsc, false);
11258 inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
11259 (slots == 1) ? EA_4BYTE : EA_8BYTE);
11263 regNumber reg = regSet.rsPickReg(RBM_ALLINT);
11264 inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
11265 regTracker.rsTrackRegTrash(reg);
11266 getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
11271 #endif // _TARGET_ARM_
11273 /*****************************************************************************
11275 * Generate code for a GT_ASG tree
11279 #pragma warning(push)
11280 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
11282 void CodeGen::genCodeForTreeSmpOpAsg(GenTree* tree)
11284 noway_assert(tree->gtOper == GT_ASG);
11286 GenTree* op1 = tree->gtOp.gtOp1;
11287 GenTree* op2 = tree->gtOp.gtOp2;
11288 regMaskTP needReg = RBM_ALLINT;
11289 regMaskTP bestReg = RBM_CORRUPT;
11290 regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT);
11291 bool ovfl = false; // Do we need an overflow check
11292 bool volat = false; // Is this a volatile store
11295 unsigned lclVarNum = compiler->lvaCount;
11296 unsigned lclILoffs = DUMMY_INIT(0);
11298 #ifdef _TARGET_ARM_
11299 if (tree->gtType == TYP_STRUCT)
11301 // We use copy block to assign structs, however to receive HFAs in registers
11302 // from a CALL, we use assignment, var = (hfa) call();
11303 assert(compiler->IsHfa(tree));
11304 genStoreFromFltRetRegs(tree);
11310 if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
11312 if (varTypeIsFloating(op1))
11313 assert(!"Bad IL: Illegal assignment of integer into float!");
11315 assert(!"Bad IL: Illegal assignment of float into integer!");
11319 if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
11321 op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
11324 /* Is the target a register or local variable? */
11325 switch (op1->gtOper)
11331 varNum = op1->gtLclVarCommon.gtLclNum;
11332 noway_assert(varNum < compiler->lvaCount);
11333 varDsc = compiler->lvaTable + varNum;
11335 /* For non-debuggable code, every definition of a lcl-var has
11336 * to be checked to see if we need to open a new scope for it.
11337 * Remember the local var info to call siCheckVarScope
11338 * AFTER code generation of the assignment.
11340 if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
11342 lclVarNum = varNum;
11343 lclILoffs = op1->gtLclVar.gtLclILoffs;
11346 /* Check against dead store ? (with min opts we may have dead stores) */
11348 noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
11350 /* Does this variable live in a register? */
11352 if (genMarkLclVar(op1))
11359 /* Get hold of the target register */
11363 op1Reg = op1->gtRegVar.gtRegNum;
11366 /* Compute the RHS (hopefully) into the variable's register.
11367 For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
11368 as variables are kept alive everywhere. So we have to be
11369 careful if we want to compute the value directly into
11370 the variable's register. */
11372 bool needToUpdateRegSetCheckLevel;
11373 needToUpdateRegSetCheckLevel = false;
11376 // We should only be accessing lvVarIndex if varDsc is tracked.
11377 assert(varDsc->lvTracked);
11379 if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
11381 noway_assert(compiler->opts.compDbgCode);
11383 /* The predictor might expect us to generate op2 directly
11384 into the var's register. However, since the variable is
11385 already alive, first kill it and its register. */
11387 if (rpCanAsgOperWithoutReg(op2, true))
11389 genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
11390 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11392 needToUpdateRegSetCheckLevel = true;
11398 needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
11403 /* Special cases: op2 is a GT_CNS_INT */
11405 if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
11407 /* Save the old life status */
11409 VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
11410 VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
11412 /* Set a flag to avoid printing the message
11413 and remember that life was changed. */
11415 genTempLiveChg = false;
11420 if (needToUpdateRegSetCheckLevel)
11421 compiler->compRegSetCheckLevel++;
11423 genCodeForTree(op2, needReg, genRegMask(op1Reg));
11425 if (needToUpdateRegSetCheckLevel)
11426 compiler->compRegSetCheckLevel--;
11427 noway_assert(compiler->compRegSetCheckLevel >= 0);
11429 noway_assert(op2->InReg());
11431 /* Make sure the value ends up in the right place ... */
11433 if (op2->gtRegNum != op1Reg)
11435 /* Make sure the target of the store is available */
11437 if (regSet.rsMaskUsed & genRegMask(op1Reg))
11438 regSet.rsSpillReg(op1Reg);
11440 #ifdef _TARGET_ARM_
11441 if (op1->TypeGet() == TYP_FLOAT)
11443 // This can only occur when we are returning a non-HFA struct
11444 // that is composed of a single float field.
11446 inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
11449 #endif // _TARGET_ARM_
11451 inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
11454 /* The value has been transferred to 'op1Reg' */
11456 regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
11458 if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
11459 gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
11461 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11465 // First we need to remove it from the original reg set mask (or else trigger an
11466 // assert when we add it to the other reg set mask).
11467 gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
11468 gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
11470 // The emitter has logic that tracks the GCness of registers and asserts if you
11471 // try to do bad things to a GC pointer (like lose its GCness).
11473 // An explict cast of a GC pointer to an int (which is legal if the
11474 // pointer is pinned) is encoded as an assignment of a GC source
11475 // to a integer variable. Unfortunately if the source was the last
11476 // use, and the source register gets reused by the destination, no
11477 // code gets emitted (That is where we are at right now). The emitter
11478 // thinks the register is a GC pointer (it did not see the cast).
11479 // This causes asserts, as well as bad GC info since we will continue
11480 // to report the register as a GC pointer even if we do arithmetic
11481 // with it. So force the emitter to see the change in the type
11482 // of variable by placing a label.
11483 // We only have to do this check at this point because in the
11484 // CAST morphing, we create a temp and assignment whenever we
11485 // have a cast that loses its GCness.
11487 if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
11489 void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
11490 gcInfo.gcRegByrefSetCur);
11496 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
11501 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
11502 // to worry about it being enregistered.
11503 noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
11513 assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
11515 if (op1->gtFlags & GTF_IND_VOLATILE)
11526 /* Is the value being assigned a simple one? */
11529 switch (op2->gtOper)
11533 if (!genMarkLclVar(op2))
11540 /* Is the target a byte/short/char value? */
11542 if (varTypeIsSmall(op1->TypeGet()))
11545 if (tree->gtFlags & GTF_REVERSE_OPS)
11548 /* Make the target addressable */
11550 op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
11552 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11554 /* Does the write barrier helper do the assignment? */
11556 regGC = WriteBarrier(op1, op2, addrReg);
11558 // Was assignment done by the WriteBarrier
11559 if (regGC == RBM_NONE)
11561 #ifdef _TARGET_ARM_
11564 // Emit a memory barrier instruction before the store
11565 instGen_MemoryBarrier();
11569 /* Move the value into the target */
11571 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
11573 // This is done in WriteBarrier when (regGC != RBM_NONE)
11575 /* Free up anything that was tied up by the LHS */
11576 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11579 /* Free up the RHS */
11580 genUpdateLife(op2);
11582 /* Remember that we've also touched the op2 register */
11584 addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
11589 GenTreeIntConCommon* con;
11590 con = op2->AsIntConCommon();
11592 ival = con->IconValue();
11594 size = emitTypeSize(tree->TypeGet());
11596 ins = ins_Store(op1->TypeGet());
11598 // If we are storing a constant into a local variable
11599 // we extend the size of the store here
11600 // this normally takes place in CodeGen::inst_TT_IV on x86.
11602 if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
11604 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
11605 LclVarDsc* varDsc = compiler->lvaTable + varNum;
11607 // Fix the immediate by sign extending if needed
11608 if (!varTypeIsUnsigned(varDsc->TypeGet()))
11610 if (size == EA_1BYTE)
11612 if ((ival & 0x7f) != ival)
11613 ival = ival | 0xffffff00;
11617 assert(size == EA_2BYTE);
11618 if ((ival & 0x7fff) != ival)
11619 ival = ival | 0xffff0000;
11623 // A local stack slot is at least 4 bytes in size, regardless of
11624 // what the local var is typed as, so auto-promote it here
11625 // unless it is a field of a promoted struct
11626 if (!varDsc->lvIsStructField)
11628 size = EA_SET_SIZE(size, EA_4BYTE);
11629 ins = ins_Store(TYP_INT);
11633 /* Make the target addressable */
11635 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
11637 #ifdef _TARGET_ARM_
11640 // Emit a memory barrier instruction before the store
11641 instGen_MemoryBarrier();
11645 /* Move the value into the target */
11647 noway_assert(op1->gtOper != GT_REG_VAR);
11648 if (con->ImmedValNeedsReloc(compiler))
11650 /* The constant is actually a handle that may need relocation
11651 applied to it. genComputeReg will do the right thing (see
11652 code in genCodeForTreeConst), so we'll just call it to load
11653 the constant into a register. */
11655 genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
11656 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11657 noway_assert(op2->InReg());
11658 inst_TT_RV(ins, op1, op2->gtRegNum);
11659 genReleaseReg(op2);
11663 regSet.rsLockUsedReg(addrReg);
11666 bool copyIconFromReg = true;
11667 regNumber iconReg = REG_NA;
11669 #ifdef _TARGET_ARM_
11670 // Only if the constant can't be encoded in a small instruction,
11671 // look for another register to copy the value from. (Assumes
11672 // target is a small register.)
11673 if ((op1->InReg()) && !isRegPairType(tree->gtType) &&
11674 arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
11676 copyIconFromReg = false;
11678 #endif // _TARGET_ARM_
11680 if (copyIconFromReg)
11682 iconReg = regTracker.rsIconIsInReg(ival);
11683 if (iconReg == REG_NA)
11684 copyIconFromReg = false;
11687 if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
11688 (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
11690 /* Move the value into the target */
11692 inst_TT_RV(ins, op1, iconReg, 0, size);
11695 #endif // REDUNDANT_LOAD
11697 inst_TT_IV(ins, op1, ival, 0, size);
11700 regSet.rsUnlockUsedReg(addrReg);
11703 /* Free up anything that was tied up by the LHS */
11705 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11712 bool isWriteBarrier = false;
11713 regMaskTP needRegOp1 = RBM_ALLINT;
11714 RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
11716 /* Is the LHS more complex than the RHS? */
11718 if (tree->gtFlags & GTF_REVERSE_OPS)
11720 /* Is the target a byte/short/char value? */
11722 if (varTypeIsSmall(op1->TypeGet()))
11724 noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
11725 // TODO: Why does this have to be true?
11726 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
11727 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
11729 if (op2->gtOper == GT_CAST && !op2->gtOverflow())
11731 /* Special case: cast to small type */
11733 if (op2->CastToType() >= op1->gtType)
11735 /* Make sure the cast operand is not > int */
11737 if (op2->CastFromType() <= TYP_INT)
11739 /* Cast via a non-smaller type */
11741 op2 = op2->gtCast.CastOp();
11746 if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
11749 switch (op1->gtType)
11764 if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
11766 /* Redundant AND */
11768 op2 = op2->gtOp.gtOp1;
11772 /* Must get the new value into a byte register */
11775 if (varTypeIsByte(op1->TypeGet()))
11776 genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
11783 /* Generate the RHS into a register */
11785 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11786 if (isWriteBarrier)
11788 #if NOGC_WRITE_BARRIERS
11789 // Exclude the REG_WRITE_BARRIER from op2's needReg mask
11790 needReg = Target::exclude_WriteBarrierReg(needReg);
11791 mustReg = RegSet::EXACT_REG;
11792 #else // !NOGC_WRITE_BARRIERS
11793 // This code should be generic across architectures.
11795 // For the standard JIT Helper calls
11796 // op1 goes into REG_ARG_0 and
11797 // op2 goes into REG_ARG_1
11799 needRegOp1 = RBM_ARG_0;
11800 needReg = RBM_ARG_1;
11801 #endif // !NOGC_WRITE_BARRIERS
11803 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11806 noway_assert(op2->InReg());
11808 /* Make the target addressable */
11810 op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
11811 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11813 /* Make sure the RHS register hasn't been spilled;
11814 keep the register marked as "used", otherwise
11815 we might get the pointer lifetimes wrong.
11818 if (varTypeIsByte(op1->TypeGet()))
11819 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11821 genRecoverReg(op2, needReg, RegSet::KEEP_REG);
11822 noway_assert(op2->InReg());
11824 /* Lock the RHS temporarily (lock only already used) */
11826 regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
11828 /* Make sure the LHS is still addressable */
11830 addrReg = genKeepAddressable(op1, addrReg);
11832 /* We can unlock (only already used ) the RHS register */
11834 regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
11836 /* Does the write barrier helper do the assignment? */
11838 regGC = WriteBarrier(op1, op2, addrReg);
11842 // Yes, assignment done by the WriteBarrier
11843 noway_assert(isWriteBarrier);
11847 #ifdef _TARGET_ARM_
11850 // Emit a memory barrier instruction before the store
11851 instGen_MemoryBarrier();
11855 /* Move the value into the target */
11857 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11861 /* Update the current liveness info */
11862 if (compiler->opts.varNames)
11863 genUpdateLife(tree);
11866 // If op2 register is still in use, free it. (Might not be in use, if
11867 // a full-call write barrier was done, and the register was a caller-saved
11869 regMaskTP op2RM = genRegMask(op2->gtRegNum);
11870 if (op2RM & regSet.rsMaskUsed)
11871 regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
11873 // This is done in WriteBarrier when (regGC != 0)
11876 /* Free up anything that was tied up by the LHS */
11877 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11882 /* Make the target addressable */
11884 isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
11886 if (isWriteBarrier)
11888 #if NOGC_WRITE_BARRIERS
11889 /* Try to avoid RBM_TMP_0 */
11890 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
11891 mustReg = RegSet::EXACT_REG; // For op2
11892 #else // !NOGC_WRITE_BARRIERS
11893 // This code should be generic across architectures.
11895 // For the standard JIT Helper calls
11896 // op1 goes into REG_ARG_0 and
11897 // op2 goes into REG_ARG_1
11899 needRegOp1 = RBM_ARG_0;
11900 needReg = RBM_ARG_1;
11901 mustReg = RegSet::EXACT_REG; // For op2
11902 #endif // !NOGC_WRITE_BARRIERS
11905 needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
11907 op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
11909 addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
11911 #if CPU_HAS_BYTE_REGS
11912 /* Is the target a byte value? */
11913 if (varTypeIsByte(op1->TypeGet()))
11915 /* Must get the new value into a byte register */
11916 needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
11917 mustReg = RegSet::EXACT_REG;
11919 if (op2->gtType >= op1->gtType)
11920 op2->gtFlags |= GTF_SMALL_OK;
11924 #if NOGC_WRITE_BARRIERS
11925 /* For WriteBarrier we can't use REG_WRITE_BARRIER */
11926 if (isWriteBarrier)
11927 needReg = Target::exclude_WriteBarrierReg(needReg);
11929 /* Also avoid using the previously computed addrReg(s) */
11930 bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
11932 /* If we have a reg available to grab then use bestReg */
11933 if (bestReg & regSet.rsRegMaskCanGrab())
11936 mustReg = RegSet::EXACT_REG;
11939 /* Generate the RHS into a register */
11940 genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
11941 noway_assert(op2->InReg());
11943 /* Make sure the target is still addressable */
11944 addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
11945 noway_assert(op2->InReg());
11947 /* Does the write barrier helper do the assignment? */
11949 regGC = WriteBarrier(op1, op2, addrReg);
11953 // Yes, assignment done by the WriteBarrier
11954 noway_assert(isWriteBarrier);
11958 assert(!isWriteBarrier);
11960 #ifdef _TARGET_ARM_
11963 // Emit a memory barrier instruction before the store
11964 instGen_MemoryBarrier();
11968 /* Move the value into the target */
11970 inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
11973 /* The new value is no longer needed */
11975 genReleaseReg(op2);
11978 /* Update the current liveness info */
11979 if (compiler->opts.varNames)
11980 genUpdateLife(tree);
11983 // This is done in WriteBarrier when (regGC != 0)
11986 /* Free up anything that was tied up by the LHS */
11987 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
11991 addrReg = RBM_NONE;
11995 noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
11996 genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
11999 /* For non-debuggable code, every definition of a lcl-var has
12000 * to be checked to see if we need to open a new scope for it.
12002 if (lclVarNum < compiler->lvaCount)
12003 siCheckVarScope(lclVarNum, lclILoffs);
12006 #pragma warning(pop)
12009 /*****************************************************************************
12011 * Generate code to complete the assignment operation
12014 void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTree* tree, regMaskTP addrReg, regNumber reg, bool ovfl)
12016 const var_types treeType = tree->TypeGet();
12017 GenTree* op1 = tree->gtOp.gtOp1;
12018 GenTree* op2 = tree->gtOp.gtOp2;
12021 if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
12022 genUpdateLife(op1);
12023 genUpdateLife(tree);
12027 if (op1->gtOper == GT_LCL_VAR)
12028 regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
12030 /* Have we just assigned a value that is in a register? */
12032 if (op2->InReg() && tree->gtOper == GT_ASG)
12034 regTracker.rsTrackRegAssign(op1, op2);
12039 noway_assert(addrReg != 0xDEADCAFE);
12041 gcInfo.gcMarkRegSetNpt(addrReg);
12045 noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
12047 /* If it is not in a register and it is a small type, then
12048 we must have loaded it up from memory, done the increment,
12049 checked for overflow, and then stored it back to memory */
12051 bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->InReg());
12055 // For small sizes, reg should be set as we sign/zero extend it.
12057 noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
12059 /* Currently we don't morph x=x+y into x+=y in try blocks
12060 * if we need overflow check, as x+y may throw an exception.
12061 * We can do it if x is not live on entry to the catch block.
12063 noway_assert(!compiler->compCurBB->hasTryIndex());
12065 genCheckOverflow(tree);
12070 /*****************************************************************************
12072 * Generate code for a special op tree
12075 void CodeGen::genCodeForTreeSpecialOp(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
12077 genTreeOps oper = tree->OperGet();
12078 regNumber reg = DUMMY_INIT(REG_CORRUPT);
12079 regMaskTP regs = regSet.rsMaskUsed;
12081 noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
12086 regs = genCodeForCall(tree->AsCall(), true);
12088 /* If the result is in a register, make sure it ends up in the right place */
12090 if (regs != RBM_NONE)
12092 genMarkTreeInReg(tree, genRegNumFromMask(regs));
12095 genUpdateLife(tree);
12099 NO_WAY("should not see this operator in this phase");
12102 case GT_ARR_BOUNDS_CHECK:
12104 #ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
12105 // MUST NEVER CHECK-IN WITH THIS ENABLED.
12106 // This is just for convenience in doing performance investigations and requires x86ret builds
12107 if (!JitConfig.JitNoRngChk())
12109 genRangeCheck(tree);
12114 genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
12119 #if defined(_TARGET_XARCH_)
12120 // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
12122 // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
12123 // right now, just get the trees evaluated.
12125 // As a friendly reminder. IL args are evaluated left to right.
12127 GenTree* location = tree->gtCmpXchg.gtOpLocation; // arg1
12128 GenTree* value = tree->gtCmpXchg.gtOpValue; // arg2
12129 GenTree* comparand = tree->gtCmpXchg.gtOpComparand; // arg3
12132 bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
12133 RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
12137 genCodeForTree(location, RBM_NONE, RBM_NONE);
12138 assert(location->InReg());
12139 addrReg = genRegMask(location->gtRegNum);
12140 regSet.rsMarkRegUsed(location);
12143 // We must have a reg for the Value, but it doesn't really matter which register.
12145 // Try to avoid EAX and the address regsiter if possible.
12146 genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
12149 // cmpxchg uses EAX as an implicit operand to hold the comparand
12150 // We're going to destroy EAX in this operation, so we better not be keeping
12151 // anything important in it.
12152 if (RBM_EAX & regSet.rsMaskVars)
12154 // We have a variable enregistered in EAX. Make sure it goes dead in this tree.
12155 for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
12157 const LclVarDsc& varDesc = compiler->lvaTable[varNum];
12158 if (!varDesc.lvIsRegCandidate())
12160 if (!varDesc.lvRegister)
12162 if (isFloatRegType(varDesc.lvType))
12164 if (varDesc.lvRegNum != REG_EAX)
12166 // We may need to check lvOtherReg.
12168 // If the variable isn't going dead during this tree, we've just trashed a local with
12170 noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
12176 genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
12178 // By this point we've evaluated everything. However the odds are that we've spilled something by
12179 // now. Let's recover all the registers and force them to stay.
12181 // Well, we just computed comparand, so it's still in EAX.
12182 noway_assert(comparand->gtRegNum == REG_EAX);
12183 regSet.rsLockUsedReg(RBM_EAX);
12185 // Stick it anywhere other than EAX.
12186 genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
12187 reg = value->gtRegNum;
12188 noway_assert(reg != REG_EAX);
12189 regSet.rsLockUsedReg(genRegMask(reg));
12193 addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
12197 genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
12200 regSet.rsUnlockUsedReg(genRegMask(reg));
12201 regSet.rsUnlockUsedReg(RBM_EAX);
12206 sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
12207 genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
12211 instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
12212 genReleaseReg(location);
12215 genReleaseReg(value);
12216 genReleaseReg(comparand);
12218 // EAX and the value register are both trashed at this point.
12219 regTracker.rsTrackRegTrash(REG_EAX);
12220 regTracker.rsTrackRegTrash(reg);
12224 genFlagsEqualToNone();
12226 #else // not defined(_TARGET_XARCH_)
12227 NYI("GT_CMPXCHG codegen");
12234 compiler->gtDispTree(tree);
12236 noway_assert(!"unexpected operator");
12237 NO_WAY("unexpected operator");
12240 noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
12241 genCodeForTree_DONE(tree, reg);
12244 /*****************************************************************************
12246 * Generate code for the given tree. tree->gtRegNum will be set to the
12247 * register where the tree lives.
12249 * If 'destReg' is non-zero, we'll do our best to compute the value into a
12250 * register that is in that register set.
12251 * Use genComputeReg() if you need the tree in a specific register.
12252 * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
12253 * the register can only be used for read, but not for write.
12254 * Use genMakeAddressable() if you only need the tree to be accessible
12255 * using a complex addressing mode, and do not necessarily need the tree
12256 * materialized in a register.
12258 * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
12260 * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
12261 * register will not be consumed right away and could possibly be spilled.
12264 void CodeGen::genCodeForTree(GenTree* tree, regMaskTP destReg, regMaskTP bestReg)
12267 if (compiler->verbose)
12269 printf("Generating code for tree ");
12270 Compiler::printTreeID(tree);
12271 printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
12273 genStressRegs(tree);
12276 noway_assert(tree);
12277 noway_assert(tree->gtOper != GT_STMT);
12278 assert(tree->IsNodeProperlySized());
12280 // When assigning to a enregistered local variable we receive
12281 // a hint that we should target the register that is used to
12282 // hold the enregistered local variable.
12283 // When receiving this hint both destReg and bestReg masks are set
12284 // to the register that is used by the enregistered local variable.
12286 // However it is possible to us to have a different local variable
12287 // targeting the same register to become alive (and later die)
12288 // as we descend the expression tree.
12290 // To handle such cases we will remove any registers that are alive from the
12291 // both the destReg and bestReg masks.
12293 regMaskTP liveMask = genLiveMask(tree);
12295 // This removes any registers used to hold enregistered locals
12296 // from the destReg and bestReg masks.
12297 // After this either mask could become 0
12299 destReg &= ~liveMask;
12300 bestReg &= ~liveMask;
12302 /* 'destReg' of 0 really means 'any' */
12304 destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
12306 if (destReg != RBM_ALL(tree->TypeGet()))
12307 bestReg = regSet.rsUseIfZero(bestReg, destReg);
12309 // Long, float, and double have their own codegen functions
12310 switch (tree->TypeGet())
12314 #if !CPU_HAS_FP_SUPPORT
12317 genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
12320 #if CPU_HAS_FP_SUPPORT
12324 // For comma nodes, we'll get back here for the last node in the comma list.
12325 if (tree->gtOper != GT_COMMA)
12327 genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
12336 noway_assert(!"These types are only used as markers in GT_CAST nodes");
12344 /* Is the value already in a register? */
12348 genCodeForTree_REG_VAR1(tree);
12352 /* We better not have a spilled value here */
12354 noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
12356 /* Figure out what kind of a node we have */
12358 unsigned kind = tree->OperKind();
12360 if (kind & GTK_CONST)
12362 /* Handle constant nodes */
12364 genCodeForTreeConst(tree, destReg, bestReg);
12366 else if (kind & GTK_LEAF)
12368 /* Handle leaf nodes */
12370 genCodeForTreeLeaf(tree, destReg, bestReg);
12372 else if (kind & GTK_SMPOP)
12374 /* Handle 'simple' unary/binary operators */
12376 genCodeForTreeSmpOp(tree, destReg, bestReg);
12380 /* Handle special operators */
12382 genCodeForTreeSpecialOp(tree, destReg, bestReg);
12386 /*****************************************************************************
12388 * Generate code for all the basic blocks in the function.
12392 #pragma warning(push)
12393 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12395 void CodeGen::genCodeForBBlist()
12400 unsigned savedStkLvl;
12403 genInterruptibleUsed = true;
12404 unsigned stmtNum = 0;
12405 unsigned totalCostEx = 0;
12406 unsigned totalCostSz = 0;
12408 // You have to be careful if you create basic blocks from now on
12409 compiler->fgSafeBasicBlockCreation = false;
12411 // This stress mode is not comptible with fully interruptible GC
12412 if (genInterruptible && compiler->opts.compStackCheckOnCall)
12414 compiler->opts.compStackCheckOnCall = false;
12417 // This stress mode is not comptible with fully interruptible GC
12418 if (genInterruptible && compiler->opts.compStackCheckOnRet)
12420 compiler->opts.compStackCheckOnRet = false;
12424 // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
12425 genPrepForEHCodegen();
12427 assert(!compiler->fgFirstBBScratch ||
12428 compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
12430 /* Initialize the spill tracking logic */
12432 regSet.rsSpillBeg();
12434 /* Initialize the line# tracking logic */
12436 if (compiler->opts.compScopeInfo)
12441 #ifdef _TARGET_X86_
12442 if (compiler->compTailCallUsed)
12444 noway_assert(isFramePointerUsed());
12445 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12449 if (compiler->opts.compDbgEnC)
12451 noway_assert(isFramePointerUsed());
12452 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12455 /* If we have any pinvoke calls, we might potentially trash everything */
12457 if (compiler->info.compCallUnmanaged)
12459 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
12460 regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
12463 /* Initialize the pointer tracking code */
12465 gcInfo.gcRegPtrSetInit();
12466 gcInfo.gcVarPtrSetInit();
12468 /* If any arguments live in registers, mark those regs as such */
12470 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
12472 /* Is this variable a parameter assigned to a register? */
12474 if (!varDsc->lvIsParam || !varDsc->lvRegister)
12477 /* Is the argument live on entry to the method? */
12479 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
12482 #if CPU_HAS_FP_SUPPORT
12483 /* Is this a floating-point argument? */
12485 if (varDsc->IsFloatRegType())
12488 noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
12491 /* Mark the register as holding the variable */
12493 if (isRegPairType(varDsc->lvType))
12495 regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
12497 if (varDsc->lvOtherReg != REG_STK)
12498 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12502 regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
12506 unsigned finallyNesting = 0;
12508 // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
12509 // allocation at the start of each basic block.
12510 VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
12512 /*-------------------------------------------------------------------------
12514 * Walk the basic blocks and generate code for each one
12519 BasicBlock* lblk; /* previous block */
12521 for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
12524 if (compiler->verbose)
12526 printf("\n=============== Generating ");
12527 block->dspBlockHeader(compiler, true, true);
12528 compiler->fgDispBBLiveness(block);
12532 VARSET_TP liveSet(VarSetOps::UninitVal());
12534 regMaskTP gcrefRegs = 0;
12535 regMaskTP byrefRegs = 0;
12537 /* Does any other block jump to this point ? */
12539 if (block->bbFlags & BBF_JMP_TARGET)
12541 /* Someone may jump here, so trash all regs */
12543 regTracker.rsTrackRegClr();
12545 genFlagsEqualToNone();
12549 /* No jump, but pointers always need to get trashed for proper GC tracking */
12551 regTracker.rsTrackRegClrPtr();
12554 /* No registers are used or locked on entry to a basic block */
12556 regSet.rsMaskUsed = RBM_NONE;
12557 regSet.rsMaskMult = RBM_NONE;
12558 regSet.rsMaskLock = RBM_NONE;
12560 // If we need to reserve registers such that they are not used
12561 // by CodeGen in this BasicBlock we do so here.
12562 // On the ARM when we have large frame offsets for locals we
12563 // will have RBM_R10 in the regSet.rsMaskResvd set,
12564 // additionally if a LocAlloc or alloca is used RBM_R9 is in
12565 // the regSet.rsMaskResvd set and we lock these registers here.
12567 if (regSet.rsMaskResvd != RBM_NONE)
12569 regSet.rsLockReg(regSet.rsMaskResvd);
12570 regSet.rsSetRegsModified(regSet.rsMaskResvd);
12573 /* Figure out which registers hold variables on entry to this block */
12575 regMaskTP specialUseMask = regSet.rsMaskResvd;
12577 specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
12578 regSet.ClearMaskVars();
12579 VarSetOps::ClearD(compiler, compiler->compCurLife);
12580 VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
12582 #if FEATURE_STACK_FP_X87
12583 VarSetOps::AssignNoCopy(compiler, genFPregVars,
12584 VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
12585 genFPregCnt = VarSetOps::Count(compiler, genFPregVars);
12586 genFPdeadRegCnt = 0;
12588 gcInfo.gcResetForBB();
12590 genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
12591 #if FEATURE_STACK_FP_X87
12592 VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
12595 // We should never enregister variables in any of the specialUseMask registers
12596 noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
12598 VarSetOps::Iter iter(compiler, liveSet);
12599 unsigned varIndex = 0;
12600 while (iter.NextElem(&varIndex))
12602 varNum = compiler->lvaTrackedToVarNum[varIndex];
12603 varDsc = compiler->lvaTable + varNum;
12604 assert(varDsc->lvTracked);
12605 /* Ignore the variable if it's not not in a reg */
12607 if (!varDsc->lvRegister)
12609 if (isFloatRegType(varDsc->lvType))
12612 /* Get hold of the index and the bitmask for the variable */
12613 regNumber regNum = varDsc->lvRegNum;
12614 regMaskTP regMask = genRegMask(regNum);
12616 regSet.AddMaskVars(regMask);
12618 if (varDsc->lvType == TYP_REF)
12619 gcrefRegs |= regMask;
12620 else if (varDsc->lvType == TYP_BYREF)
12621 byrefRegs |= regMask;
12623 /* Mark the register holding the variable as such */
12625 if (varTypeIsMultiReg(varDsc))
12627 regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
12628 if (varDsc->lvOtherReg != REG_STK)
12630 regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
12631 regMask |= genRegMask(varDsc->lvOtherReg);
12636 regTracker.rsTrackRegLclVar(regNum, varNum);
12640 gcInfo.gcPtrArgCnt = 0;
12642 #if FEATURE_STACK_FP_X87
12644 regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
12646 memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
12647 memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
12649 // Setup fp state on block entry
12650 genSetupStateStackFP(block);
12653 if (compiler->verbose)
12658 #endif // FEATURE_STACK_FP_X87
12660 /* Make sure we keep track of what pointers are live */
12662 noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
12663 gcInfo.gcRegGCrefSetCur = gcrefRegs;
12664 gcInfo.gcRegByrefSetCur = byrefRegs;
12666 /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
12667 represent the exception object (TYP_REF).
12668 We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
12669 to the block, it will be the first thing evaluated
12670 (thanks to GTF_ORDER_SIDEEFF).
12673 if (handlerGetsXcptnObj(block->bbCatchTyp))
12675 GenTree* firstStmt = block->FirstNonPhiDef();
12676 if (firstStmt != NULL)
12678 GenTree* firstTree = firstStmt->gtStmt.gtStmtExpr;
12679 if (compiler->gtHasCatchArg(firstTree))
12681 gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
12686 /* Start a new code output block */
12687 CLANG_FORMAT_COMMENT_ANCHOR;
12689 #if FEATURE_EH_FUNCLETS
12690 #if defined(_TARGET_ARM_)
12691 genInsertNopForUnwinder(block);
12692 #endif // defined(_TARGET_ARM_)
12694 genUpdateCurrentFunclet(block);
12695 #endif // FEATURE_EH_FUNCLETS
12697 #ifdef _TARGET_XARCH_
12698 if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
12700 getEmitter()->emitLoopAlign();
12705 if (compiler->opts.dspCode)
12706 printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
12709 block->bbEmitCookie = NULL;
12711 if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
12713 /* Mark a label and update the current set of live GC refs */
12715 block->bbEmitCookie =
12716 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
12717 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
12718 /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
12725 if (block == compiler->fgFirstColdBlock)
12728 if (compiler->verbose)
12730 printf("\nThis is the start of the cold region of the method\n");
12733 // We should never have a block that falls through into the Cold section
12734 noway_assert(!lblk->bbFallsThrough());
12736 // We require the block that starts the Cold section to have a label
12737 noway_assert(block->bbEmitCookie);
12738 getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
12741 /* Both stacks are always empty on entry to a basic block */
12744 #if FEATURE_STACK_FP_X87
12745 genResetFPstkLevel();
12746 #endif // FEATURE_STACK_FP_X87
12748 genAdjustStackLevel(block);
12750 savedStkLvl = genStackLevel;
12752 /* Tell everyone which basic block we're working on */
12754 compiler->compCurBB = block;
12756 siBeginBlock(block);
12758 // BBF_INTERNAL blocks don't correspond to any single IL instruction.
12759 if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
12760 genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
12762 bool firstMapping = true;
12764 /*---------------------------------------------------------------------
12766 * Generate code for each statement-tree in the block
12769 CLANG_FORMAT_COMMENT_ANCHOR;
12771 #if FEATURE_EH_FUNCLETS
12772 if (block->bbFlags & BBF_FUNCLET_BEG)
12774 genReserveFuncletProlog(block);
12776 #endif // FEATURE_EH_FUNCLETS
12778 for (GenTree* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
12780 noway_assert(stmt->gtOper == GT_STMT);
12782 /* Do we have a new IL-offset ? */
12784 if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
12786 /* Create and append a new IP-mapping entry */
12787 genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
12788 firstMapping = false;
12792 if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
12794 noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
12795 if (compiler->opts.dspCode && compiler->opts.dspInstrs)
12797 while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
12799 genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
12805 /* Get hold of the statement tree */
12806 GenTree* tree = stmt->gtStmt.gtStmtExpr;
12810 if (compiler->verbose)
12812 printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
12813 printf("Holding variables: ");
12814 dspRegMask(regSet.rsMaskVars);
12816 compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
12818 #if FEATURE_STACK_FP_X87
12822 printf("Execution Order:\n");
12823 for (GenTree* treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
12825 compiler->gtDispTree(treeNode, 0, NULL, true);
12829 totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
12830 totalCostSz += stmt->gtCostSz;
12833 compiler->compCurStmt = stmt;
12835 compiler->compCurLifeTree = NULL;
12836 switch (tree->gtOper)
12839 // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
12840 // reported as alive even though not used within the caller for managed debugger sake. So
12841 // consider the return value of the method as used if generating debuggable code.
12842 genCodeForCall(tree->AsCall(), compiler->opts.MinOpts() || compiler->opts.compDbgCode);
12843 genUpdateLife(tree);
12844 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
12850 // Just do the side effects
12851 genEvalSideEffects(tree);
12855 /* Generate code for the tree */
12857 genCodeForTree(tree, 0);
12861 regSet.rsSpillChk();
12863 /* The value of the tree isn't used, unless it's a return stmt */
12865 if (tree->gtOper != GT_RETURN)
12866 gcInfo.gcMarkRegPtrVal(tree);
12868 #if FEATURE_STACK_FP_X87
12869 genEndOfStatement();
12873 /* Make sure we didn't bungle pointer register tracking */
12875 regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
12876 regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
12878 // If return is a GC-type, clear it. Note that if a common
12879 // epilog is generated (compiler->genReturnBB) it has a void return
12880 // even though we might return a ref. We can't use the compRetType
12881 // as the determiner because something we are tracking as a byref
12882 // might be used as a return value of a int function (which is legal)
12883 if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
12884 (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
12886 nonVarPtrRegs &= ~RBM_INTRET;
12889 // When profiling, the first statement in a catch block will be the
12890 // harmless "inc" instruction (does not interfere with the exception
12893 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && (stmt == block->bbTreeList) &&
12894 (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
12896 nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
12901 printf("Regset after tree=");
12902 Compiler::printTreeID(tree);
12903 printf(" BB%02u gcr=", block->bbNum);
12904 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12905 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
12907 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12908 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
12909 printf(", regVars=");
12910 printRegMaskInt(regSet.rsMaskVars);
12911 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
12915 noway_assert(nonVarPtrRegs == 0);
12918 noway_assert(stmt->gtOper == GT_STMT);
12920 genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
12922 } //-------- END-FOR each statement-tree of the current block ---------
12924 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
12928 /* Is this the last block, and are there any open scopes left ? */
12930 bool isLastBlockProcessed = (block->bbNext == NULL);
12931 if (block->isBBCallAlwaysPair())
12933 isLastBlockProcessed = (block->bbNext->bbNext == NULL);
12936 if (isLastBlockProcessed && siOpenScopeList.scNext)
12938 /* This assert no longer holds, because we may insert a throw
12939 block to demarcate the end of a try or finally region when they
12940 are at the end of the method. It would be nice if we could fix
12941 our code so that this throw block will no longer be necessary. */
12943 // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
12945 siCloseAllOpenScopes();
12949 SubtractStackLevel(savedStkLvl);
12951 gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
12953 if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
12954 compiler->genChangeLife(block->bbLiveOut);
12956 /* Both stacks should always be empty on exit from a basic block */
12958 noway_assert(genStackLevel == 0);
12959 #if FEATURE_STACK_FP_X87
12960 noway_assert(genGetFPstkLevel() == 0);
12962 // Do the FPState matching that may have to be done
12963 genCodeForEndBlockTransitionStackFP(block);
12966 noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
12968 /* Do we need to generate a jump or return? */
12970 switch (block->bbJumpKind)
12973 inst_JMP(EJ_jmp, block->bbJumpDest);
12977 genExitCode(block);
12981 // If we have a throw at the end of a function or funclet, we need to emit another instruction
12982 // afterwards to help the OS unwinder determine the correct context during unwind.
12983 // We insert an unexecuted breakpoint instruction in several situations
12984 // following a throw instruction:
12985 // 1. If the throw is the last instruction of the function or funclet. This helps
12986 // the OS unwinder determine the correct context during an unwind from the
12987 // thrown exception.
12988 // 2. If this is this is the last block of the hot section.
12989 // 3. If the subsequent block is a special throw block.
12990 if ((block->bbNext == NULL)
12991 #if FEATURE_EH_FUNCLETS
12992 || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
12993 #endif // FEATURE_EH_FUNCLETS
12994 || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
12995 block->bbNext == compiler->fgFirstColdBlock)
12997 instGen(INS_BREAKPOINT); // This should never get executed
13002 case BBJ_CALLFINALLY:
13004 #if defined(_TARGET_X86_)
13006 /* If we are about to invoke a finally locally from a try block,
13007 we have to set the hidden slot corresponding to the finally's
13008 nesting level. When invoked in response to an exception, the
13009 EE usually does it.
13011 We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
13013 This code depends on this order not being messed up.
13020 step: mov [ebp- n ],0
13025 noway_assert(isFramePointerUsed());
13027 // Get the nesting level which contains the finally
13028 compiler->fgGetNestingLevel(block, &finallyNesting);
13030 // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
13031 unsigned filterEndOffsetSlotOffs;
13032 filterEndOffsetSlotOffs =
13033 (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
13035 unsigned curNestingSlotOffs;
13036 curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
13038 // Zero out the slot for the next nesting level
13039 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
13040 curNestingSlotOffs - TARGET_POINTER_SIZE);
13042 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
13043 curNestingSlotOffs);
13045 // Now push the address of where the finally funclet should
13046 // return to directly.
13047 if (!(block->bbFlags & BBF_RETLESS_CALL))
13049 assert(block->isBBCallAlwaysPair());
13050 getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
13054 // EE expects a DWORD, so we give him 0
13055 inst_IV(INS_push_hide, 0);
13058 // Jump to the finally BB
13059 inst_JMP(EJ_jmp, block->bbJumpDest);
13061 #elif defined(_TARGET_ARM_)
13063 // Now set REG_LR to the address of where the finally funclet should
13064 // return to directly.
13066 BasicBlock* bbFinallyRet;
13067 bbFinallyRet = NULL;
13069 // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
13070 // we would have otherwise created retless calls.
13071 assert(block->isBBCallAlwaysPair());
13073 assert(block->bbNext != NULL);
13074 assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
13075 assert(block->bbNext->bbJumpDest != NULL);
13076 assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
13078 bbFinallyRet = block->bbNext->bbJumpDest;
13079 bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
13081 // Load the address where the finally funclet should return into LR.
13082 // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
13084 genMov32RelocatableDisplacement(bbFinallyRet, REG_LR);
13085 regTracker.rsTrackRegTrash(REG_LR);
13087 // Jump to the finally BB
13088 inst_JMP(EJ_jmp, block->bbJumpDest);
13093 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
13094 // jump target using bbJumpDest - that is already used to point
13095 // to the finally block. So just skip past the BBJ_ALWAYS unless the
13096 // block is RETLESS.
13097 if (!(block->bbFlags & BBF_RETLESS_CALL))
13099 assert(block->isBBCallAlwaysPair());
13102 block = block->bbNext;
13106 #ifdef _TARGET_ARM_
13108 case BBJ_EHCATCHRET:
13109 // set r0 to the address the VM should return to after the catch
13110 genMov32RelocatableDisplacement(block->bbJumpDest, REG_R0);
13111 regTracker.rsTrackRegTrash(REG_R0);
13115 case BBJ_EHFINALLYRET:
13116 case BBJ_EHFILTERRET:
13117 genReserveFuncletEpilog(block);
13120 #else // _TARGET_ARM_
13122 case BBJ_EHFINALLYRET:
13123 case BBJ_EHFILTERRET:
13124 case BBJ_EHCATCHRET:
13127 #endif // _TARGET_ARM_
13135 noway_assert(!"Unexpected bbJumpKind");
13140 compiler->compCurBB = 0;
13143 } //------------------ END-FOR each block of the method -------------------
13145 /* Nothing is live at this point */
13146 genUpdateLife(VarSetOps::MakeEmpty(compiler));
13148 /* Finalize the spill tracking logic */
13150 regSet.rsSpillEnd();
13152 /* Finalize the temp tracking logic */
13154 compiler->tmpEnd();
13157 if (compiler->verbose)
13160 printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
13161 printf("%s\n", compiler->info.compFullName);
13166 #pragma warning(pop)
13169 /*****************************************************************************
13171 * Generate code for a long operation.
13172 * needReg is a recommendation of which registers to use for the tree.
13173 * For partially enregistered longs, the tree will be marked as in a register
13174 * without loading the stack part into a register. Note that only leaf
13175 * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
13176 * enregistered so that we can know the memory location of the other half.
13180 #pragma warning(push)
13181 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
13183 void CodeGen::genCodeForTreeLng(GenTree* tree, regMaskTP needReg, regMaskTP avoidReg)
13188 regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
13193 noway_assert(tree);
13194 noway_assert(tree->gtOper != GT_STMT);
13195 noway_assert(genActualType(tree->gtType) == TYP_LONG);
13197 /* Figure out what kind of a node we have */
13199 oper = tree->OperGet();
13200 kind = tree->OperKind();
13205 regPair = tree->gtRegPair;
13207 gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
13212 /* Is this a constant node? */
13214 if (kind & GTK_CONST)
13218 /* Pick a register pair for the value */
13220 regPair = regSet.rsPickRegPair(needReg);
13222 /* Load the value into the registers */
13223 CLANG_FORMAT_COMMENT_ANCHOR;
13225 #if !CPU_HAS_FP_SUPPORT
13226 if (oper == GT_CNS_DBL)
13228 noway_assert(sizeof(__int64) == sizeof(double));
13230 noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
13232 lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
13237 noway_assert(oper == GT_CNS_LNG);
13239 lval = tree->gtLngCon.gtLconVal;
13242 genSetRegToIcon(genRegPairLo(regPair), int(lval));
13243 genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
13247 /* Is this a leaf node? */
13249 if (kind & GTK_LEAF)
13257 /* This case has to consider the case in which an int64 LCL_VAR
13258 * may both be enregistered and also have a cached copy of itself
13259 * in a different set of registers.
13260 * We want to return the registers that have the most in common
13261 * with the needReg mask
13264 /* Does the var have a copy of itself in the cached registers?
13265 * And are these cached registers both free?
13266 * If so use these registers if they match any needReg.
13269 regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
13271 if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13272 ((genRegPairMask(regPair) & needReg) != RBM_NONE))
13277 /* Does the variable live in a register?
13278 * If so use these registers.
13280 if (genMarkLclVar(tree))
13283 /* If tree is not an enregistered variable then
13284 * be sure to use any cached register that contain
13285 * a copy of this local variable
13287 if (regPair != REG_PAIR_NONE)
13296 // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
13297 // to worry about it being enregistered.
13298 noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
13304 /* Pick a register pair for the value */
13306 regPair = regSet.rsPickRegPair(needReg);
13308 /* Load the value into the registers */
13310 instruction loadIns;
13312 loadIns = ins_Load(TYP_INT); // INS_ldr
13313 regLo = genRegPairLo(regPair);
13314 regHi = genRegPairHi(regPair);
13316 #if CPU_LOAD_STORE_ARCH
13318 regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
13319 inst_RV_TT(INS_lea, regAddr, tree, 0);
13320 regTracker.rsTrackRegTrash(regAddr);
13322 if (regLo != regAddr)
13324 // assert(regLo != regAddr); // forced by if statement
13325 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13326 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13330 // assert(regHi != regAddr); // implied by regpair property and the if statement
13331 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
13332 getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
13336 inst_RV_TT(loadIns, regLo, tree, 0);
13337 inst_RV_TT(loadIns, regHi, tree, 4);
13340 #ifdef _TARGET_ARM_
13341 if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
13343 // Emit a memory barrier instruction after the load
13344 instGen_MemoryBarrier();
13348 regTracker.rsTrackRegTrash(regLo);
13349 regTracker.rsTrackRegTrash(regHi);
13355 compiler->gtDispTree(tree);
13357 noway_assert(!"unexpected leaf");
13361 /* Is it a 'simple' unary/binary operator? */
13363 if (kind & GTK_SMPOP)
13369 bool setCarry = false;
13372 GenTree* op1 = tree->gtOp.gtOp1;
13373 GenTree* op2 = tree->gtGetOp2IfPresent();
13379 unsigned lclVarNum = compiler->lvaCount;
13380 unsigned lclVarILoffs = DUMMY_INIT(0);
13382 /* Is the target a local ? */
13384 if (op1->gtOper == GT_LCL_VAR)
13386 unsigned varNum = op1->gtLclVarCommon.gtLclNum;
13389 noway_assert(varNum < compiler->lvaCount);
13390 varDsc = compiler->lvaTable + varNum;
13392 // No dead stores, (with min opts we may have dead stores)
13393 noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
13395 /* For non-debuggable code, every definition of a lcl-var has
13396 * to be checked to see if we need to open a new scope for it.
13397 * Remember the local var info to call siCheckVarScope
13398 * AFTER codegen of the assignment.
13400 if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
13401 (compiler->info.compVarScopesCount > 0))
13403 lclVarNum = varNum;
13404 lclVarILoffs = op1->gtLclVar.gtLclILoffs;
13407 /* Has the variable been assigned to a register (pair) ? */
13409 if (genMarkLclVar(op1))
13411 noway_assert(op1->InReg());
13412 regPair = op1->gtRegPair;
13413 regLo = genRegPairLo(regPair);
13414 regHi = genRegPairHi(regPair);
13415 noway_assert(regLo != regHi);
13417 /* Is the value being assigned a constant? */
13419 if (op2->gtOper == GT_CNS_LNG)
13421 /* Move the value into the target */
13423 genMakeRegPairAvailable(regPair);
13426 if (regLo == REG_STK)
13428 ins = ins_Store(TYP_INT);
13432 // Always do the stack first (in case it grabs a register it can't
13433 // clobber regLo this way)
13434 if (regHi == REG_STK)
13436 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13440 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
13442 // The REG_STK case has already been handled
13443 if (regHi != REG_STK)
13446 inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13449 goto DONE_ASSG_REGS;
13452 /* Compute the RHS into desired register pair */
13454 if (regHi != REG_STK)
13456 genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
13457 noway_assert(op2->InReg());
13458 noway_assert(op2->gtRegPair == regPair);
13466 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
13468 noway_assert(op2->InReg());
13470 curPair = op2->gtRegPair;
13471 curLo = genRegPairLo(curPair);
13472 curHi = genRegPairHi(curPair);
13474 /* move high first, target is on stack */
13475 inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
13477 if (regLo != curLo)
13479 if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
13480 regSet.rsSpillReg(regLo);
13481 inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
13482 regTracker.rsTrackRegCopy(regLo, curLo);
13486 genReleaseRegPair(op2);
13487 goto DONE_ASSG_REGS;
13491 /* Is the value being assigned a constant? */
13493 if (op2->gtOper == GT_CNS_LNG)
13495 /* Make the target addressable */
13497 addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
13499 /* Move the value into the target */
13501 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
13502 inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
13504 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13510 /* Catch a case where can avoid generating op reg, mem. Better pairing
13515 * To avoid problems with order of evaluation, only do this if op2 is
13516 * a non-enregistered local variable
13519 if (GenTree::OperIsCommutative(oper) &&
13520 op1->gtOper == GT_LCL_VAR &&
13521 op2->gtOper == GT_LCL_VAR)
13523 regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
13525 /* Is op2 a non-enregistered local variable? */
13526 if (regPair == REG_PAIR_NONE)
13528 regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
13530 /* Is op1 an enregistered local variable? */
13531 if (regPair != REG_PAIR_NONE)
13533 /* Swap the operands */
13542 /* Eliminate worthless assignment "lcl = lcl" */
13544 if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
13545 op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
13547 genUpdateLife(op2);
13551 if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
13552 // op1,op2 need to be materialized in the correct order.
13553 (tree->gtFlags & GTF_REVERSE_OPS))
13555 /* Generate the small RHS into a register pair */
13557 GenTree* smallOpr = op2->gtOp.gtOp1;
13559 genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
13561 /* Make the target addressable */
13563 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
13565 /* Make sure everything is still addressable */
13567 genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
13568 noway_assert(smallOpr->InReg());
13569 regHi = smallOpr->gtRegNum;
13570 addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
13572 // conv.ovf.u8 could overflow if the original number was negative
13573 if (op2->gtOverflow())
13575 noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
13576 0); // conv.ovf.u8.un should be bashed to conv.u8.un
13577 instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
13578 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
13579 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
13582 /* Move the value into the target */
13584 inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
13585 inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
13587 /* Free up anything that was tied up by either side */
13589 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13590 genReleaseReg(smallOpr);
13593 if (op1->gtOper == GT_LCL_VAR)
13595 /* clear this local from reg table */
13596 regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13598 /* mark RHS registers as containing the local var */
13599 regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
13605 /* Is the LHS more complex than the RHS? */
13607 if (tree->gtFlags & GTF_REVERSE_OPS)
13609 /* Generate the RHS into a register pair */
13611 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
13612 noway_assert(op2->InReg());
13614 /* Make the target addressable */
13615 op1 = genCodeForCommaTree(op1);
13616 addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
13618 /* Make sure the RHS register hasn't been spilled */
13620 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
13624 /* Make the target addressable */
13626 op1 = genCodeForCommaTree(op1);
13627 addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
13629 /* Generate the RHS into a register pair */
13631 genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
13634 /* Lock 'op2' and make sure 'op1' is still addressable */
13636 noway_assert(op2->InReg());
13637 regPair = op2->gtRegPair;
13639 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13641 /* Move the value into the target */
13643 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
13644 inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
13646 /* Free up anything that was tied up by either side */
13648 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13649 genReleaseRegPair(op2);
13655 if (op1->gtOper == GT_LCL_VAR)
13657 /* Clear this local from reg table */
13659 regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
13661 if ((op2->InReg()) &&
13662 /* constant has precedence over local */
13663 // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
13664 tree->gtOper == GT_ASG)
13668 /* mark RHS registers as containing the local var */
13670 regNo = genRegPairLo(op2->gtRegPair);
13671 if (regNo != REG_STK)
13672 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
13674 regNo = genRegPairHi(op2->gtRegPair);
13675 if (regNo != REG_STK)
13677 /* For partially enregistered longs, we might have
13678 stomped on op2's hiReg */
13679 if (!(op1->InReg()) || regNo != genRegPairLo(op1->gtRegPair))
13681 regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
13690 genUpdateLife(op1);
13691 genUpdateLife(tree);
13693 /* For non-debuggable code, every definition of a lcl-var has
13694 * to be checked to see if we need to open a new scope for it.
13696 if (lclVarNum < compiler->lvaCount)
13697 siCheckVarScope(lclVarNum, lclVarILoffs);
13715 ovfl = tree->gtOverflow();
13719 insLo = insHi = INS_AND;
13722 insLo = insHi = INS_OR;
13725 insLo = insHi = INS_XOR;
13734 /* The following makes an assumption about gtSetEvalOrder(this) */
13736 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
13738 /* Special case: check for "(long(intval) << 32) | longval" */
13740 if (oper == GT_OR && op1->gtOper == GT_LSH)
13742 GenTree* lshLHS = op1->gtOp.gtOp1;
13743 GenTree* lshRHS = op1->gtOp.gtOp2;
13745 if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13746 genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13749 /* Throw away the cast of the shift operand. */
13751 op1 = lshLHS->gtCast.CastOp();
13753 /* Special case: check op2 for "ulong(intval)" */
13754 if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
13755 genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
13757 /* Throw away the cast of the second operand. */
13759 op2 = op2->gtCast.CastOp();
13760 goto SIMPLE_OR_LONG;
13762 /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
13763 else if (op2->gtOper == GT_AND)
13766 andLHS = op2->gtOp.gtOp1;
13768 andRHS = op2->gtOp.gtOp2;
13770 if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13771 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13772 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13774 /* Throw away the cast of the second operand. */
13776 op2 = andLHS->gtCast.CastOp();
13779 // Load the high DWORD, ie. op1
13781 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13783 noway_assert(op1->InReg());
13784 regHi = op1->gtRegNum;
13785 regSet.rsMarkRegUsed(op1);
13787 // Load the low DWORD, ie. op2
13789 genCodeForTree(op2, needReg & ~genRegMask(regHi));
13791 noway_assert(op2->InReg());
13792 regLo = op2->gtRegNum;
13794 /* Make sure regHi is still around. Also, force
13795 regLo to be excluded in case regLo==regHi */
13797 genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
13798 regHi = op1->gtRegNum;
13800 regPair = gen2regs2pair(regLo, regHi);
13805 /* Generate the following sequence:
13806 Prepare op1 (discarding shift)
13807 Compute op2 into some regpair
13811 /* First, make op1 addressable */
13813 /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
13815 It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
13816 which the long value is computed is from needReg. But at this point the safest fix is
13817 to exclude regSet.rsMaskResvd.
13819 Note that needReg could be the set of free registers (excluding reserved ones). If we don't
13820 exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
13822 reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd.
13824 regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
13826 addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
13828 genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
13830 noway_assert(op2->InReg());
13831 regPair = op2->gtRegPair;
13832 regHi = genRegPairHi(regPair);
13834 /* The operand might have interfered with the address */
13836 addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
13838 /* Now compute the result */
13840 inst_RV_TT(insHi, regHi, op1, 0);
13842 regTracker.rsTrackRegTrash(regHi);
13844 /* Free up anything that was tied up by the LHS */
13846 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
13848 /* The result is where the second operand is sitting */
13850 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
13852 regPair = op2->gtRegPair;
13857 /* Special case: check for "longval | (long(intval) << 32)" */
13859 if (oper == GT_OR && op2->gtOper == GT_LSH)
13861 GenTree* lshLHS = op2->gtOp.gtOp1;
13862 GenTree* lshRHS = op2->gtOp.gtOp2;
13864 if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
13865 genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
13868 /* We throw away the cast of the shift operand. */
13870 op2 = lshLHS->gtCast.CastOp();
13872 /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
13874 if (op1->gtOper == GT_AND)
13876 GenTree* andLHS = op1->gtOp.gtOp1;
13877 GenTree* andRHS = op1->gtOp.gtOp2;
13879 if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
13880 andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
13881 genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
13883 /* Throw away the cast of the first operand. */
13885 op1 = andLHS->gtCast.CastOp();
13887 // Load the low DWORD, ie. op1
13889 genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
13891 noway_assert(op1->InReg());
13892 regLo = op1->gtRegNum;
13893 regSet.rsMarkRegUsed(op1);
13895 // Load the high DWORD, ie. op2
13897 genCodeForTree(op2, needReg & ~genRegMask(regLo));
13899 noway_assert(op2->InReg());
13900 regHi = op2->gtRegNum;
13902 /* Make sure regLo is still around. Also, force
13903 regHi to be excluded in case regLo==regHi */
13905 genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
13906 regLo = op1->gtRegNum;
13908 regPair = gen2regs2pair(regLo, regHi);
13913 /* Generate the following sequence:
13914 Compute op1 into some regpair
13915 Make op2 (ignoring shift) addressable
13919 // First, generate the first operand into some register
13921 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13922 noway_assert(op1->InReg());
13924 /* Make the second operand addressable */
13926 addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
13928 /* Make sure the result is in a free register pair */
13930 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13931 regPair = op1->gtRegPair;
13932 regHi = genRegPairHi(regPair);
13934 /* The operand might have interfered with the address */
13936 addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
13938 /* Compute the new value */
13940 inst_RV_TT(insHi, regHi, op2, 0);
13942 /* The value in the high register has been trashed */
13944 regTracker.rsTrackRegTrash(regHi);
13950 /* Generate the first operand into registers */
13952 if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
13953 ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
13955 regPair = regSet.rsPickRegPair(needReg);
13956 genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13960 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
13962 noway_assert(op1->InReg());
13964 regPair = op1->gtRegPair;
13965 op1Mask = genRegPairMask(regPair);
13967 /* Make the second operand addressable */
13968 regMaskTP needReg2;
13969 needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
13970 addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
13972 // TODO: If 'op1' got spilled and 'op2' happens to be
13973 // TODO: in a register, and we have add/mul/and/or/xor,
13974 // TODO: reverse the operands since we can perform the
13975 // TODO: operation directly with the spill temp, e.g.
13976 // TODO: 'add regHi, [temp]'.
13978 /* Make sure the result is in a free register pair */
13980 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
13981 regPair = op1->gtRegPair;
13982 op1Mask = genRegPairMask(regPair);
13984 regLo = genRegPairLo(regPair);
13985 regHi = genRegPairHi(regPair);
13987 /* Make sure that we don't spill regLo/regHi below */
13988 regSet.rsLockUsedReg(op1Mask);
13990 /* The operand might have interfered with the address */
13992 addrReg = genKeepAddressable(op2, addrReg);
13994 /* The value in the register pair is about to be trashed */
13996 regTracker.rsTrackRegTrash(regLo);
13997 regTracker.rsTrackRegTrash(regHi);
13999 /* Compute the new value */
14004 if (op2->gtOper == GT_CNS_LNG)
14006 __int64 icon = op2->gtLngCon.gtLconVal;
14008 /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
14013 if ((int)(icon) == -1)
14015 if ((int)(icon >> 32) == -1)
14018 if (!(icon & I64(0x00000000FFFFFFFF)))
14020 genSetRegToIcon(regLo, 0);
14024 if (!(icon & I64(0xFFFFFFFF00000000)))
14026 /* Just to always set low first*/
14030 inst_RV_TT(insLo, regLo, op2, 0);
14033 genSetRegToIcon(regHi, 0);
14041 if (!(icon & I64(0x00000000FFFFFFFF)))
14043 if (!(icon & I64(0xFFFFFFFF00000000)))
14051 // Fix 383813 X86/ARM ILGEN
14052 // Fix 383793 ARM ILGEN
14053 // Fix 383911 ARM ILGEN
14055 newMask = addrReg & ~op1Mask;
14056 regSet.rsLockUsedReg(newMask);
14060 insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14061 inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
14065 insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
14066 inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
14069 regSet.rsUnlockUsedReg(newMask);
14070 regSet.rsUnlockUsedReg(op1Mask);
14074 /* Free up anything that was tied up by the LHS */
14076 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
14078 /* The result is where the first operand is sitting */
14080 genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
14082 regPair = op1->gtRegPair;
14085 genCheckOverflow(tree);
14091 regPair = genCodeForLongModInt(tree, needReg);
14096 /* Special case: both operands promoted from int */
14098 assert(tree->gtIsValid64RsltMul());
14100 /* Change to an integer multiply temporarily */
14102 tree->gtType = TYP_INT;
14104 noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
14105 tree->gtOp.gtOp1 = op1->gtCast.CastOp();
14106 tree->gtOp.gtOp2 = op2->gtCast.CastOp();
14108 assert(tree->gtFlags & GTF_MUL_64RSLT);
14110 #if defined(_TARGET_X86_)
14111 // imul on x86 requires EDX:EAX
14112 genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
14113 noway_assert(tree->InReg());
14114 noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
14115 #elif defined(_TARGET_ARM_)
14116 genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
14117 noway_assert(tree->InReg());
14119 assert(!"Unsupported target for 64-bit multiply codegen");
14122 /* Restore gtType, op1 and op2 from the change above */
14124 tree->gtType = TYP_LONG;
14125 tree->gtOp.gtOp1 = op1;
14126 tree->gtOp.gtOp2 = op2;
14128 #if defined(_TARGET_X86_)
14129 /* The result is now in EDX:EAX */
14130 regPair = REG_PAIR_EAXEDX;
14131 #elif defined(_TARGET_ARM_)
14132 regPair = tree->gtRegPair;
14137 helper = CORINFO_HELP_LLSH;
14140 helper = CORINFO_HELP_LRSH;
14143 helper = CORINFO_HELP_LRSZ;
14148 noway_assert(op1->gtType == TYP_LONG);
14149 noway_assert(genActualType(op2->gtType) == TYP_INT);
14151 /* Is the second operand a constant? */
14153 if (op2->gtOper == GT_CNS_INT)
14155 unsigned int count = op2->gtIntCon.gtIconVal;
14157 /* Compute the left operand into a free register pair */
14159 genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
14160 noway_assert(op1->InReg());
14162 regPair = op1->gtRegPair;
14163 regLo = genRegPairLo(regPair);
14164 regHi = genRegPairHi(regPair);
14166 /* Assume the value in the register pair is trashed. In some cases, though,
14167 a register might be set to zero, and we can use that information to improve
14168 some code generation.
14171 regTracker.rsTrackRegTrash(regLo);
14172 regTracker.rsTrackRegTrash(regHi);
14174 /* Generate the appropriate shift instructions */
14181 // regHi, regLo are correct
14183 else if (count < 32)
14185 #if defined(_TARGET_XARCH_)
14186 inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
14187 #elif defined(_TARGET_ARM_)
14188 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
14189 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
14190 INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
14193 #endif // _TARGET_*
14194 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
14196 else // count >= 32
14198 assert(count >= 32);
14201 #if defined(_TARGET_ARM_)
14204 // mov low dword into high dword (i.e. shift left by 32-bits)
14205 inst_RV_RV(INS_mov, regHi, regLo);
14209 assert(count > 32 && count < 64);
14210 getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
14214 // mov low dword into high dword (i.e. shift left by 32-bits)
14215 inst_RV_RV(INS_mov, regHi, regLo);
14218 // Shift high dword left by count - 32
14219 inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
14221 #endif // _TARGET_*
14223 else // count >= 64
14225 assert(count >= 64);
14226 genSetRegToIcon(regHi, 0);
14228 genSetRegToIcon(regLo, 0);
14235 // regHi, regLo are correct
14237 else if (count < 32)
14239 #if defined(_TARGET_XARCH_)
14240 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14241 #elif defined(_TARGET_ARM_)
14242 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14243 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14244 INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14247 #endif // _TARGET_*
14248 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
14250 else // count >= 32
14252 assert(count >= 32);
14255 #if defined(_TARGET_ARM_)
14258 // mov high dword into low dword (i.e. shift right by 32-bits)
14259 inst_RV_RV(INS_mov, regLo, regHi);
14263 assert(count > 32 && count < 64);
14264 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
14268 // mov high dword into low dword (i.e. shift right by 32-bits)
14269 inst_RV_RV(INS_mov, regLo, regHi);
14272 // Shift low dword right by count - 32
14273 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
14275 #endif // _TARGET_*
14278 // Propagate sign bit in high dword
14279 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14283 // Propagate the sign from the high dword
14284 inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
14292 // regHi, regLo are correct
14294 else if (count < 32)
14296 #if defined(_TARGET_XARCH_)
14297 inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
14298 #elif defined(_TARGET_ARM_)
14299 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
14300 getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
14301 INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
14304 #endif // _TARGET_*
14305 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
14307 else // count >= 32
14309 assert(count >= 32);
14312 #if defined(_TARGET_ARM_)
14315 // mov high dword into low dword (i.e. shift right by 32-bits)
14316 inst_RV_RV(INS_mov, regLo, regHi);
14320 assert(count > 32 && count < 64);
14321 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
14325 // mov high dword into low dword (i.e. shift right by 32-bits)
14326 inst_RV_RV(INS_mov, regLo, regHi);
14329 // Shift low dword right by count - 32
14330 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
14332 #endif // _TARGET_*
14334 else // count >= 64
14336 assert(count >= 64);
14337 genSetRegToIcon(regLo, 0);
14339 genSetRegToIcon(regHi, 0);
14344 noway_assert(!"Illegal oper for long shift");
14351 /* Which operand are we supposed to compute first? */
14353 assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
14355 if (tree->gtFlags & GTF_REVERSE_OPS)
14357 /* The second operand can't be a constant */
14359 noway_assert(op2->gtOper != GT_CNS_INT);
14361 /* Load the shift count, hopefully into RBM_SHIFT */
14362 RegSet::ExactReg exactReg;
14363 if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
14364 exactReg = RegSet::EXACT_REG;
14366 exactReg = RegSet::ANY_REG;
14367 genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
14369 /* Compute the left operand into REG_LNGARG_0 */
14371 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14372 noway_assert(op1->InReg());
14374 /* Lock op1 so that it doesn't get trashed */
14376 regSet.rsLockUsedReg(RBM_LNGARG_0);
14378 /* Make sure the shift count wasn't displaced */
14380 genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
14384 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14388 /* Compute the left operand into REG_LNGARG_0 */
14390 genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
14391 noway_assert(op1->InReg());
14393 /* Compute the shift count into RBM_SHIFT */
14395 genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
14399 regSet.rsLockUsedReg(RBM_SHIFT_LNG);
14401 /* Make sure the value hasn't been displaced */
14403 genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
14405 /* Lock op1 so that it doesn't get trashed */
14407 regSet.rsLockUsedReg(RBM_LNGARG_0);
14410 #ifndef _TARGET_X86_
14411 /* The generic helper is a C-routine and so it follows the full ABI */
14413 /* Spill any callee-saved registers which are being used */
14414 regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
14416 /* But do not spill our argument registers. */
14417 spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14421 regSet.rsSpillRegs(spillRegs);
14424 #endif // !_TARGET_X86_
14426 /* Perform the shift by calling a helper function */
14428 noway_assert(op1->gtRegPair == REG_LNGARG_0);
14429 noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
14430 noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
14432 genEmitHelperCall(helper,
14434 EA_8BYTE); // retSize
14436 #ifdef _TARGET_X86_
14437 /* The value in the register pair is trashed */
14439 regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
14440 regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
14441 #else // _TARGET_X86_
14442 /* The generic helper is a C-routine and so it follows the full ABI */
14443 regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
14444 #endif // _TARGET_X86_
14446 /* Release both operands */
14448 regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
14449 genReleaseRegPair(op1);
14450 genReleaseReg(op2);
14454 noway_assert(op1->InReg());
14455 regPair = op1->gtRegPair;
14461 /* Generate the operand into some register pair */
14463 genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
14464 noway_assert(op1->InReg());
14466 regPair = op1->gtRegPair;
14468 /* Figure out which registers the value is in */
14470 regLo = genRegPairLo(regPair);
14471 regHi = genRegPairHi(regPair);
14473 /* The value in the register pair is about to be trashed */
14475 regTracker.rsTrackRegTrash(regLo);
14476 regTracker.rsTrackRegTrash(regHi);
14478 /* Unary "neg": negate the value in the register pair */
14479 if (oper == GT_NEG)
14481 #ifdef _TARGET_ARM_
14483 // ARM doesn't have an opcode that sets the carry bit like
14484 // x86, so we can't use neg/addc/neg. Instead we use subtract
14485 // with carry. Too bad this uses an extra register.
14487 // Lock regLo and regHi so we don't pick them, and then pick
14488 // a third register to be our 0.
14489 regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
14490 regSet.rsLockReg(regPairMask);
14491 regMaskTP regBest = RBM_ALLINT & ~avoidReg;
14492 regNumber regZero = genGetRegSetToIcon(0, regBest);
14493 regSet.rsUnlockReg(regPairMask);
14495 inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
14496 getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
14498 #elif defined(_TARGET_XARCH_)
14500 inst_RV(INS_NEG, regLo, TYP_LONG);
14501 inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
14502 inst_RV(INS_NEG, regHi, TYP_LONG);
14504 NYI("GT_NEG on TYP_LONG");
14509 /* Unary "not": flip all the bits in the register pair */
14511 inst_RV(INS_NOT, regLo, TYP_LONG);
14512 inst_RV(INS_NOT, regHi, TYP_LONG);
14523 regMaskTP availMask = RBM_ALLINT & ~needReg;
14525 /* Make sure the operand is addressable */
14527 addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
14529 GenTree* addr = oper == GT_IND ? op1 : tree;
14531 /* Pick a register for the value */
14533 regPair = regSet.rsPickRegPair(needReg);
14534 tmpMask = genRegPairMask(regPair);
14536 /* Is there any overlap between the register pair and the address? */
14540 if (tmpMask & addrReg)
14542 /* Does one or both of the target registers overlap? */
14544 if ((tmpMask & addrReg) != tmpMask)
14546 /* Only one register overlaps */
14548 noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
14550 /* If the low register overlaps, load the upper half first */
14552 if (addrReg & genRegMask(genRegPairLo(regPair)))
14559 /* The register completely overlaps with the address */
14561 noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
14563 /* Can we pick another pair easily? */
14565 regFree = regSet.rsRegMaskFree() & ~addrReg;
14567 regFree &= needReg;
14569 /* More than one free register available? */
14571 if (regFree && !genMaxOneBit(regFree))
14573 regPair = regSet.rsPickRegPair(regFree);
14574 tmpMask = genRegPairMask(regPair);
14578 // printf("Overlap: needReg = %08X\n", needReg);
14580 // Reg-prediction won't allow this
14581 noway_assert((regSet.rsMaskVars & addrReg) == 0);
14583 // Grab one fresh reg, and use any one of addrReg
14585 if (regFree) // Try to follow 'needReg'
14586 regLo = regSet.rsGrabReg(regFree);
14587 else // Pick any reg besides addrReg
14588 regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
14590 unsigned regBit = 0x1;
14593 for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
14595 // Found one of addrReg. Use it.
14596 if (regBit & addrReg)
14599 noway_assert(genIsValidReg(regNo)); // Should have found regNo
14601 regPair = gen2regs2pair(regLo, regNo);
14602 tmpMask = genRegPairMask(regPair);
14607 /* Make sure the value is still addressable */
14609 noway_assert(genStillAddressable(tree));
14611 /* Figure out which registers the value is in */
14613 regLo = genRegPairLo(regPair);
14614 regHi = genRegPairHi(regPair);
14616 /* The value in the register pair is about to be trashed */
14618 regTracker.rsTrackRegTrash(regLo);
14619 regTracker.rsTrackRegTrash(regHi);
14621 /* Load the target registers from where the value is */
14625 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14626 regSet.rsLockReg(genRegMask(regHi));
14627 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14628 regSet.rsUnlockReg(genRegMask(regHi));
14632 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
14633 regSet.rsLockReg(genRegMask(regLo));
14634 inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
14635 regSet.rsUnlockReg(genRegMask(regLo));
14638 #ifdef _TARGET_ARM_
14639 if (tree->gtFlags & GTF_IND_VOLATILE)
14641 // Emit a memory barrier instruction after the load
14642 instGen_MemoryBarrier();
14646 genUpdateLife(tree);
14647 genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
14653 /* What are we casting from? */
14655 switch (op1->gtType)
14665 regMaskTP hiRegMask;
14666 regMaskTP loRegMask;
14668 // For an unsigned cast we don't need to sign-extend the 32 bit value
14669 if (tree->gtFlags & GTF_UNSIGNED)
14671 // Does needReg have exactly two bits on and thus
14672 // specifies the exact register pair that we want to use
14673 if (!genMaxOneBit(needReg))
14675 regPair = regSet.rsFindRegPairNo(needReg);
14676 if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
14677 goto ANY_FREE_REG_UNSIGNED;
14678 loRegMask = genRegMask(genRegPairLo(regPair));
14679 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14680 goto ANY_FREE_REG_UNSIGNED;
14681 hiRegMask = genRegMask(genRegPairHi(regPair));
14685 ANY_FREE_REG_UNSIGNED:
14686 loRegMask = needReg;
14687 hiRegMask = needReg;
14690 genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14691 noway_assert(op1->InReg());
14693 regLo = op1->gtRegNum;
14694 loRegMask = genRegMask(regLo);
14695 regSet.rsLockUsedReg(loRegMask);
14696 regHi = regSet.rsPickReg(hiRegMask);
14697 regSet.rsUnlockUsedReg(loRegMask);
14699 regPair = gen2regs2pair(regLo, regHi);
14701 // Move 0 to the higher word of the ULong
14702 genSetRegToIcon(regHi, 0, TYP_INT);
14704 /* We can now free up the operand */
14705 genReleaseReg(op1);
14709 #ifdef _TARGET_XARCH_
14710 /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
14711 and we need the result to be in those registers.
14712 cdq is smaller so we use it for SMALL_CODE
14715 if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
14716 (regSet.rsRegMaskFree() & RBM_EDX))
14718 genCodeForTree(op1, RBM_EAX);
14719 regSet.rsMarkRegUsed(op1);
14721 /* If we have to spill EDX, might as well use the faster
14722 sar as the spill will increase code size anyway */
14724 if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
14726 hiRegMask = regSet.rsRegMaskFree();
14727 goto USE_SAR_FOR_CAST;
14730 regSet.rsGrabReg(RBM_EDX);
14731 regTracker.rsTrackRegTrash(REG_EDX);
14733 /* Convert the int in EAX into a long in EDX:EAX */
14737 /* The result is in EDX:EAX */
14739 regPair = REG_PAIR_EAXEDX;
14744 /* use the sar instruction to sign-extend a 32-bit integer */
14746 // Does needReg have exactly two bits on and thus
14747 // specifies the exact register pair that we want to use
14748 if (!genMaxOneBit(needReg))
14750 regPair = regSet.rsFindRegPairNo(needReg);
14751 if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
14752 goto ANY_FREE_REG_SIGNED;
14753 loRegMask = genRegMask(genRegPairLo(regPair));
14754 if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
14755 goto ANY_FREE_REG_SIGNED;
14756 hiRegMask = genRegMask(genRegPairHi(regPair));
14760 ANY_FREE_REG_SIGNED:
14761 loRegMask = needReg;
14762 hiRegMask = RBM_NONE;
14765 genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
14766 #ifdef _TARGET_XARCH_
14769 noway_assert(op1->InReg());
14771 regLo = op1->gtRegNum;
14772 loRegMask = genRegMask(regLo);
14773 regSet.rsLockUsedReg(loRegMask);
14774 regHi = regSet.rsPickReg(hiRegMask);
14775 regSet.rsUnlockUsedReg(loRegMask);
14777 regPair = gen2regs2pair(regLo, regHi);
14779 #ifdef _TARGET_ARM_
14780 /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14781 // Use one instruction instead of two
14782 getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
14784 /* Copy the lo32 bits from regLo to regHi and sign-extend it */
14785 inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
14786 inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
14789 /* The value in the upper register is trashed */
14791 regTracker.rsTrackRegTrash(regHi);
14794 /* We can now free up the operand */
14795 genReleaseReg(op1);
14797 // conv.ovf.u8 could overflow if the original number was negative
14798 if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
14800 regNumber hiReg = genRegPairHi(regPair);
14801 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14802 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14803 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14812 /* Load the FP value onto the coprocessor stack */
14814 genCodeForTreeFlt(op1);
14816 /* Allocate a temp for the long value */
14818 temp = compiler->tmpGetTemp(TYP_LONG);
14820 /* Store the FP value into the temp */
14822 inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
14825 /* Pick a register pair for the value */
14827 regPair = regSet.rsPickRegPair(needReg);
14829 /* Figure out which registers the value is in */
14831 regLo = genRegPairLo(regPair);
14832 regHi = genRegPairHi(regPair);
14834 /* The value in the register pair is about to be trashed */
14836 regTracker.rsTrackRegTrash(regLo);
14837 regTracker.rsTrackRegTrash(regHi);
14839 /* Load the converted value into the registers */
14841 inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
14842 inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
14844 /* We no longer need the temp */
14846 compiler->tmpRlsTemp(temp);
14849 NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
14855 noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
14857 genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
14858 regPair = op1->gtRegPair;
14860 // Do we need to set the sign-flag, or can we checked if it is set?
14861 // and not do this "test" if so.
14865 regNumber hiReg = genRegPairHi(op1->gtRegPair);
14866 noway_assert(hiReg != REG_STK);
14867 instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
14871 inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
14874 emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
14875 genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
14881 compiler->gtDispTree(tree);
14883 NO_WAY("unexpected cast to long");
14890 * This code is cloned from the regular processing of GT_RETURN values. We have to remember to
14891 * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really
14892 * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
14895 // TODO: this should be done AFTER we called exit mon so that
14896 // we are sure that we don't have to keep 'this' alive
14898 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
14900 /* either it's an "empty" statement or the return statement
14901 of a synchronized method
14904 genPInvokeMethodEpilog();
14907 #if CPU_LONG_USES_REGPAIR
14908 /* There must be a long return value */
14912 /* Evaluate the return value into EDX:EAX */
14914 genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
14916 noway_assert(op1->InReg());
14917 noway_assert(op1->gtRegPair == REG_LNGRET);
14920 NYI("64-bit return");
14923 #ifdef PROFILING_SUPPORTED
14924 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
14925 // the GT_RETURN tree.
14927 if (compiler->compCurBB == compiler->genReturnBB)
14929 genProfilingLeaveCallback();
14935 noway_assert(!"inliner-generated ?: for longs NYI");
14936 NO_WAY("inliner-generated ?: for longs NYI");
14941 if (tree->gtFlags & GTF_REVERSE_OPS)
14944 genCodeForTreeLng(op2, needReg, avoidReg);
14945 genUpdateLife(op2);
14947 noway_assert(op2->InReg());
14949 regSet.rsMarkRegPairUsed(op2);
14951 // Do side effects of op1
14952 genEvalSideEffects(op1);
14954 // Recover op2 if spilled
14955 genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
14957 genReleaseRegPair(op2);
14959 genUpdateLife(tree);
14961 regPair = op2->gtRegPair;
14965 noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
14967 /* Generate side effects of the first operand */
14969 genEvalSideEffects(op1);
14970 genUpdateLife(op1);
14972 /* Is the value of the second operand used? */
14974 if (tree->gtType == TYP_VOID)
14976 /* The right operand produces no result */
14978 genEvalSideEffects(op2);
14979 genUpdateLife(tree);
14983 /* Generate the second operand, i.e. the 'real' value */
14985 genCodeForTreeLng(op2, needReg, avoidReg);
14987 /* The result of 'op2' is also the final result */
14989 regPair = op2->gtRegPair;
14996 /* Generate the operand, i.e. the 'real' value */
14998 genCodeForTreeLng(op1, needReg, avoidReg);
15000 /* The result of 'op1' is also the final result */
15002 regPair = op1->gtRegPair;
15011 genCodeForTreeLng(op1, needReg, avoidReg);
15012 regPair = op1->gtRegPair;
15020 compiler->gtDispTree(tree);
15022 noway_assert(!"unexpected 64-bit operator");
15025 /* See what kind of a special operator we have here */
15031 retMask = genCodeForCall(tree->AsCall(), true);
15032 if (retMask == RBM_NONE)
15033 regPair = REG_PAIR_NONE;
15035 regPair = regSet.rsFindRegPairNo(retMask);
15040 compiler->gtDispTree(tree);
15042 NO_WAY("unexpected long operator");
15047 genUpdateLife(tree);
15049 /* Here we've computed the value of 'tree' into 'regPair' */
15051 noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
15053 genMarkTreeInRegPair(tree, regPair);
15056 #pragma warning(pop)
15059 /*****************************************************************************
15061 * Generate code for a mod of a long by an int.
15064 regPairNo CodeGen::genCodeForLongModInt(GenTree* tree, regMaskTP needReg)
15066 #ifdef _TARGET_X86_
15071 genTreeOps oper = tree->OperGet();
15072 GenTree* op1 = tree->gtOp.gtOp1;
15073 GenTree* op2 = tree->gtOp.gtOp2;
15075 /* Codegen only for Unsigned MOD */
15076 noway_assert(oper == GT_UMOD);
15078 /* op2 must be a long constant in the range 2 to 0x3fffffff */
15080 noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
15081 (op2->gtLngCon.gtLconVal <= 0x3fffffff));
15082 int val = (int)op2->gtLngCon.gtLconVal;
15084 op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
15086 op2->gtType = TYP_INT;
15087 op2->gtIntCon.gtIconVal = val;
15089 /* Which operand are we supposed to compute first? */
15091 if (tree->gtFlags & GTF_REVERSE_OPS)
15093 /* Compute the second operand into a scratch register, other
15096 needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15098 /* Special case: if op2 is a local var we are done */
15100 if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15102 addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15106 genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15108 noway_assert(op2->InReg());
15109 addrReg = genRegMask(op2->gtRegNum);
15112 /* Compute the first operand into EAX:EDX */
15114 genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
15115 noway_assert(op1->InReg());
15116 noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15118 /* And recover the second argument while locking the first one */
15120 addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15124 /* Compute the first operand into EAX:EDX */
15126 genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
15127 noway_assert(op1->InReg());
15128 noway_assert(op1->gtRegPair == REG_PAIR_TMP);
15130 /* Compute the second operand into a scratch register, other
15133 needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
15135 /* Special case: if op2 is a local var we are done */
15137 if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
15139 addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
15143 genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
15145 noway_assert(op2->InReg());
15146 addrReg = genRegMask(op2->gtRegNum);
15149 /* Recover the first argument */
15151 genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
15153 /* And recover the second argument while locking the first one */
15155 addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
15158 /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
15159 contains the 32bit divisor. We want to generate the following code:
15161 ==========================
15164 cmp edx, op2->gtRegNum
15175 ==========================
15176 This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
15179 BasicBlock* lab_no_overflow = genCreateTempLabel();
15181 // grab a temporary register other than eax, edx, and op2->gtRegNum
15183 regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
15185 // EAX and tempReg will be trashed by the mov instructions. Doing
15186 // this early won't hurt, and might prevent confusion in genSetRegToIcon.
15188 regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15189 regTracker.rsTrackRegTrash(tempReg);
15191 inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
15192 inst_JMP(EJ_jb, lab_no_overflow);
15194 inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
15195 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15196 genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15197 inst_TT(INS_UNSIGNED_DIVIDE, op2);
15198 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
15200 // Jump point for no overflow divide
15202 genDefineTempLabel(lab_no_overflow);
15204 // Issue the divide instruction
15206 inst_TT(INS_UNSIGNED_DIVIDE, op2);
15208 /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
15210 regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
15211 regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
15212 regTracker.rsTrackRegTrash(tempReg);
15213 regTracker.rsTrackRegTrash(op2->gtRegNum);
15215 if (tree->gtFlags & GTF_MOD_INT_RESULT)
15217 /* We don't need to normalize the result, because the caller wants
15220 regPair = REG_PAIR_TMP_REVERSE;
15224 /* The result is now in EDX, we now have to normalize it, i.e. we have
15226 mov eax, edx; xor edx, edx (for UMOD)
15229 inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
15231 genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
15233 regPair = REG_PAIR_TMP;
15236 genReleaseRegPair(op1);
15237 genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
15241 #else // !_TARGET_X86_
15243 NYI("codegen for LongModInt");
15245 return REG_PAIR_NONE;
15247 #endif // !_TARGET_X86_
15250 // Given a tree, return the number of registers that are currently
15251 // used to hold integer enregistered local variables.
15252 // Note that, an enregistered TYP_LONG can take 1 or 2 registers.
15253 unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTree* tree)
15255 unsigned regCount = 0;
15257 VarSetOps::Iter iter(compiler, compiler->compCurLife);
15258 unsigned varNum = 0;
15259 while (iter.NextElem(&varNum))
15261 unsigned lclNum = compiler->lvaTrackedToVarNum[varNum];
15262 LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
15264 if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
15268 if (varTypeIsLong(varDsc->TypeGet()))
15270 // For enregistered LONG/ULONG, the lower half should always be in a register.
15271 noway_assert(varDsc->lvRegNum != REG_STK);
15273 // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
15275 if (varDsc->lvOtherReg != REG_STK)
15286 /*****************************************************************************/
15287 /*****************************************************************************/
15288 #if CPU_HAS_FP_SUPPORT
15289 /*****************************************************************************
15291 * Generate code for a floating-point operation.
15294 void CodeGen::genCodeForTreeFlt(GenTree* tree,
15295 regMaskTP needReg, /* = RBM_ALLFLOAT */
15296 regMaskTP bestReg) /* = RBM_NONE */
15298 genCodeForTreeFloat(tree, needReg, bestReg);
15300 if (tree->OperGet() == GT_RETURN)
15302 // Make sure to get ALL THE EPILOG CODE
15304 // TODO: this should be done AFTER we called exit mon so that
15305 // we are sure that we don't have to keep 'this' alive
15307 if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
15309 /* either it's an "empty" statement or the return statement
15310 of a synchronized method
15313 genPInvokeMethodEpilog();
15316 #ifdef PROFILING_SUPPORTED
15317 // The profiling hook does not trash registers, so it's safe to call after we emit the code for
15318 // the GT_RETURN tree.
15320 if (compiler->compCurBB == compiler->genReturnBB)
15322 genProfilingLeaveCallback();
15328 /*****************************************************************************/
15329 #endif // CPU_HAS_FP_SUPPORT
15331 /*****************************************************************************
15333 * Generate a table switch - the switch value (0-based) is in register 'reg'.
15336 void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
15338 unsigned jmpTabBase;
15342 // In debug code, we don't optimize away the trivial switch statements. So we can get here with a
15343 // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table.
15344 noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
15345 inst_JMP(EJ_jmp, jumpTab[0]);
15349 noway_assert(jumpCnt >= 2);
15351 /* Is the number of cases right for a test and jump switch? */
15353 const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
15354 const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
15355 const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
15357 unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
15359 // This means really just a single cmp/jcc (aka a simple if/else)
15360 if (fFirstCaseFollows || fDefaultFollows)
15361 minSwitchTabJumpCnt++;
15363 #ifdef _TARGET_ARM_
15364 // On the ARM for small switch tables we will
15365 // generate a sequence of compare and branch instructions
15366 // because the code to load the base of the switch
15367 // table is huge and hideous due to the relocation... :(
15369 minSwitchTabJumpCnt++;
15370 if (fHaveScratchReg)
15371 minSwitchTabJumpCnt++;
15373 #endif // _TARGET_ARM_
15375 bool useJumpSequence = jumpCnt < minSwitchTabJumpCnt;
15377 #if defined(_TARGET_UNIX_) && defined(_TARGET_ARM_)
15378 // Force using an inlined jumping instead switch table generation.
15379 // Switch jump table is generated with incorrect values in CoreRT case,
15380 // so any large switch will crash after loading to PC any such value.
15381 // I think this is due to the fact that we use absolute addressing
15382 // instead of relative. But in CoreRT is used as a rule relative
15383 // addressing when we generate an executable.
15384 // See also https://github.com/dotnet/coreclr/issues/13194
15385 useJumpSequence = useJumpSequence || compiler->IsTargetAbi(CORINFO_CORERT_ABI);
15386 #endif // defined(_TARGET_UNIX_) && defined(_TARGET_ARM_)
15388 if (useJumpSequence)
15390 /* Does the first case label follow? */
15391 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
15393 if (fFirstCaseFollows)
15395 /* Check for the default case */
15396 inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15397 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15398 inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15400 /* No need to jump to the first case */
15405 /* Generate a series of "dec reg; jmp label" */
15407 // Make sure that we can trash the register so
15408 // that we can generate a series of compares and jumps
15410 if ((jumpCnt > 0) && !fHaveScratchReg)
15412 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15413 inst_RV_RV(INS_mov, tmpReg, reg);
15414 regTracker.rsTrackRegTrash(tmpReg);
15418 while (jumpCnt > 0)
15420 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15421 inst_JMP(jmpEqual, *jumpTab++);
15427 /* Check for case0 first */
15428 instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
15429 inst_JMP(jmpEqual, *jumpTab);
15431 /* No need to jump to the first case or the default */
15436 /* Generate a series of "dec reg; jmp label" */
15438 // Make sure that we can trash the register so
15439 // that we can generate a series of compares and jumps
15441 if ((jumpCnt > 0) && !fHaveScratchReg)
15443 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
15444 inst_RV_RV(INS_mov, tmpReg, reg);
15445 regTracker.rsTrackRegTrash(tmpReg);
15449 while (jumpCnt > 0)
15451 inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
15452 inst_JMP(jmpEqual, *jumpTab++);
15456 if (!fDefaultFollows)
15458 inst_JMP(EJ_jmp, *jumpTab);
15462 if ((fFirstCaseFollows || fDefaultFollows) &&
15463 compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
15465 inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
15471 /* First take care of the default case */
15473 inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
15474 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
15475 inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
15477 /* Generate the jump table contents */
15479 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
15482 if (compiler->opts.dspCode)
15483 printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
15486 for (unsigned index = 0; index < jumpCnt - 1; index++)
15488 BasicBlock* target = jumpTab[index];
15490 noway_assert(target->bbFlags & BBF_JMP_TARGET);
15493 if (compiler->opts.dspCode)
15494 printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
15497 getEmitter()->emitDataGenData(index, target);
15500 getEmitter()->emitDataGenEnd();
15502 #ifdef _TARGET_ARM_
15503 // We need to load the address of the table into a register.
15504 // The data section might get placed a long distance away, so we
15505 // can't safely do a PC-relative ADR. :(
15506 // Pick any register except the index register.
15508 regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
15509 genMov32RelocatableDataLabel(jmpTabBase, regTabBase);
15510 regTracker.rsTrackRegTrash(regTabBase);
15512 // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
15513 getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
15515 #else // !_TARGET_ARM_
15517 getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
15522 /*****************************************************************************
15524 * Generate code for a switch statement.
15527 void CodeGen::genCodeForSwitch(GenTree* tree)
15530 BasicBlock** jumpTab;
15535 noway_assert(tree->gtOper == GT_SWITCH);
15536 oper = tree->gtOp.gtOp1;
15537 noway_assert(genActualTypeIsIntOrI(oper->gtType));
15539 /* Get hold of the jump table */
15541 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
15543 jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
15544 jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
15546 /* Compute the switch value into some register */
15548 genCodeForTree(oper, 0);
15550 /* Get hold of the register the value is in */
15552 noway_assert(oper->InReg());
15553 reg = oper->gtRegNum;
15555 #if FEATURE_STACK_FP_X87
15556 if (!compCurFPState.IsEmpty())
15558 return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
15561 #endif // FEATURE_STACK_FP_X87
15563 return genTableSwitch(reg, jumpCnt, jumpTab);
15567 /*****************************************************************************/
15568 /*****************************************************************************
15569 * Emit a call to a helper function.
15573 void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
15575 // Can we call the helper function directly
15577 void *addr = NULL, **pAddr = NULL;
15579 #if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
15580 // Don't ask VM if it hasn't requested ELT hooks
15581 if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
15582 (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
15583 helper == CORINFO_HELP_PROF_FCN_TAILCALL))
15585 addr = compiler->compProfilerMethHnd;
15590 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
15593 #ifdef _TARGET_ARM_
15594 if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
15596 // Load the address into a register and call through a register
15597 regNumber indCallReg =
15598 regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
15601 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
15605 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
15606 regTracker.rsTrackRegTrash(indCallReg);
15609 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
15610 INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
15611 argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15612 gcInfo.gcRegByrefSetCur,
15613 BAD_IL_OFFSET, // ilOffset
15614 indCallReg, // ireg
15615 REG_NA, 0, 0, // xreg, xmul, disp
15617 emitter::emitNoGChelper(helper),
15618 (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15622 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
15623 INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
15624 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15625 0, /* ilOffset, ireg, xreg, xmul, disp */
15626 false, /* isJump */
15627 emitter::emitNoGChelper(helper),
15628 (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
15633 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
15637 callType = emitter::EC_FUNC_TOKEN_INDIR;
15641 getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
15642 argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
15643 gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
15644 0, /* ilOffset, ireg, xreg, xmul, disp */
15645 false, /* isJump */
15646 emitter::emitNoGChelper(helper));
15650 regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
15651 regTracker.rsTrashRegsForGCInterruptability();
15654 /*****************************************************************************
15656 * Push the given argument list, right to left; returns the total amount of
15660 #if !FEATURE_FIXED_OUT_ARGS
15662 #pragma warning(push)
15663 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
15665 size_t CodeGen::genPushArgList(GenTreeCall* call)
15667 GenTreeArgList* regArgs = call->gtCallLateArgs;
15671 GenTreeArgList* args;
15672 // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
15673 // so we can iterate over this argument list more uniformly.
15674 // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
15675 GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCallArgs);
15676 if (call->gtCallObjp == NULL)
15678 args = call->gtCallArgs;
15682 firstForObjp.Current() = call->gtCallObjp;
15683 args = &firstForObjp;
15690 for (; args; args = args->Rest())
15692 addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
15694 /* Get hold of the next argument value */
15695 curr = args->Current();
15697 if (curr->IsArgPlaceHolderNode())
15699 assert(curr->gtFlags & GTF_LATE_ARG);
15705 // If we have a comma expression, eval the non-last, then deal with the last.
15706 if (!(curr->gtFlags & GTF_LATE_ARG))
15707 curr = genCodeForCommaTree(curr);
15709 /* See what type of a value we're passing */
15710 type = curr->TypeGet();
15712 opsz = genTypeSize(genActualType(type));
15722 /* Don't want to push a small value, make it a full word */
15724 genCodeForTree(curr, 0);
15726 __fallthrough; // now the value should be in a register ...
15731 #if !CPU_HAS_FP_SUPPORT
15735 if (curr->gtFlags & GTF_LATE_ARG)
15737 assert(curr->gtOper == GT_ASG);
15738 /* one more argument will be passed in a register */
15739 noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
15741 /* arg is passed in the register, nothing on the stack */
15746 /* Is this value a handle? */
15748 if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
15750 /* Emit a fixup for the push instruction */
15752 inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
15759 /* Is the value a constant? */
15761 if (curr->gtOper == GT_CNS_INT)
15765 regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
15769 inst_RV(INS_push, reg, TYP_INT);
15774 inst_IV(INS_push, curr->gtIntCon.gtIconVal);
15777 /* If the type is TYP_REF, then this must be a "null". So we can
15778 treat it as a TYP_INT as we don't need to report it as a GC ptr */
15780 noway_assert(curr->TypeGet() == TYP_INT ||
15781 (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
15789 if (curr->gtFlags & GTF_LATE_ARG)
15791 /* This must be a register arg temp assignment */
15793 noway_assert(curr->gtOper == GT_ASG);
15795 /* Evaluate it to the temp */
15797 genCodeForTree(curr, 0);
15799 /* Increment the current argument register counter */
15801 intRegState.rsCurRegArgNum++;
15807 /* This is a 32-bit integer non-register argument */
15809 addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
15810 inst_TT(INS_push, curr);
15812 genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
15817 #if !CPU_HAS_FP_SUPPORT
15821 /* Is the value a constant? */
15823 if (curr->gtOper == GT_CNS_LNG)
15825 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
15827 inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
15834 addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
15836 inst_TT(INS_push, curr, sizeof(int));
15838 inst_TT(INS_push, curr);
15843 #if CPU_HAS_FP_SUPPORT
15847 #if FEATURE_STACK_FP_X87
15848 addrReg = genPushArgumentStackFP(curr);
15857 /* Is this a nothing node, deferred register argument? */
15859 if (curr->gtFlags & GTF_LATE_ARG)
15861 GenTree* arg = curr;
15862 if (arg->gtOper == GT_COMMA)
15864 while (arg->gtOper == GT_COMMA)
15866 GenTree* op1 = arg->gtOp.gtOp1;
15867 genEvalSideEffects(op1);
15868 genUpdateLife(op1);
15869 arg = arg->gtOp.gtOp2;
15871 if (!arg->IsNothingNode())
15873 genEvalSideEffects(arg);
15874 genUpdateLife(arg);
15878 /* increment the register count and continue with the next argument */
15880 intRegState.rsCurRegArgNum++;
15882 noway_assert(opsz == 0);
15892 GenTree* arg = curr;
15893 while (arg->gtOper == GT_COMMA)
15895 GenTree* op1 = arg->gtOp.gtOp1;
15896 genEvalSideEffects(op1);
15897 genUpdateLife(op1);
15898 arg = arg->gtOp.gtOp2;
15901 noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
15902 noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
15903 noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
15905 if (arg->gtOper == GT_MKREFANY)
15907 GenTree* op1 = arg->gtOp.gtOp1;
15908 GenTree* op2 = arg->gtOp.gtOp2;
15910 addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
15912 /* Is this value a handle? */
15913 if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
15915 /* Emit a fixup for the push instruction */
15917 inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
15922 regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
15923 inst_TT(INS_push, op2);
15925 genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
15927 addrReg = genKeepAddressable(op1, addrReg);
15928 inst_TT(INS_push, op1);
15930 genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
15932 opsz = 2 * TARGET_POINTER_SIZE;
15936 noway_assert(arg->gtOper == GT_OBJ);
15938 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
15940 GenTree* structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
15941 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
15942 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
15944 // As much as we would like this to be a noway_assert, we can't because
15945 // there are some weird casts out there, and backwards compatiblity
15946 // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
15947 // lvPromoted in general currently do not require the local to be
15948 // TYP_STRUCT, so this assert is really more about how we wish the world
15949 // was then some JIT invariant.
15950 assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
15952 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
15954 if (varDsc->lvPromoted &&
15956 Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
15958 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
15962 // Get the number of BYTES to copy to the stack
15963 opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass),
15964 TARGET_POINTER_SIZE);
15965 size_t bytesToBeCopied = opsz;
15967 // postponedFields is true if we have any postponed fields
15968 // Any field that does not start on a 4-byte boundary is a postponed field
15969 // Such a field is required to be a short or a byte
15971 // postponedRegKind records the kind of scratch register we will
15972 // need to process the postponed fields
15973 // RBM_NONE means that we don't need a register
15975 // expectedAlignedOffset records the aligned offset that
15976 // has to exist for a push to cover the postponed fields.
15977 // Since all promoted structs have the tightly packed property
15978 // we are guaranteed that we will have such a push
15980 bool postponedFields = false;
15981 regMaskTP postponedRegKind = RBM_NONE;
15982 size_t expectedAlignedOffset = UINT_MAX;
15984 VARSET_TP* deadVarBits = NULL;
15985 compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
15987 // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
15989 for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
15990 varNum >= (int)varDsc->lvFieldLclStart; varNum--)
15992 LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
15994 if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
15996 noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
15997 noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
16000 // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
16001 // For packed structs we will go back and store the unaligned bytes and shorts
16002 // in the next loop
16004 if (fieldVarDsc->lvStackAligned())
16006 if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
16007 fieldVarDsc->lvFldOffset + (unsigned)TARGET_POINTER_SIZE != bytesToBeCopied)
16009 // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
16010 // Just push some junk (i.e EAX) on the stack.
16011 inst_RV(INS_push, REG_EAX, TYP_INT);
16014 bytesToBeCopied -= TARGET_POINTER_SIZE;
16017 // If we have an expectedAlignedOffset make sure that this push instruction
16018 // is what we expect to cover the postponedFields
16020 if (expectedAlignedOffset != UINT_MAX)
16022 // This push must be for a small field
16023 noway_assert(fieldVarDsc->lvExactSize < 4);
16024 // The fldOffset for this push should be equal to the expectedAlignedOffset
16025 noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
16026 expectedAlignedOffset = UINT_MAX;
16029 // Push the "upper half" of LONG var first
16031 if (isRegPairType(fieldVarDsc->lvType))
16033 if (fieldVarDsc->lvOtherReg != REG_STK)
16035 inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
16038 // Prepare the set of vars to be cleared from gcref/gcbyref set
16039 // in case they become dead after genUpdateLife.
16040 // genDoneAddressable() will remove dead gc vars by calling
16041 // gcInfo.gcMarkRegSetNpt.
16042 // Although it is not addrReg, we just borrow the name here.
16043 addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
16047 getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, TARGET_POINTER_SIZE);
16051 bytesToBeCopied -= TARGET_POINTER_SIZE;
16054 // Push the "upper half" of DOUBLE var if it is not enregistered.
16056 if (fieldVarDsc->lvType == TYP_DOUBLE)
16058 if (!fieldVarDsc->lvRegister)
16060 getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, TARGET_POINTER_SIZE);
16064 bytesToBeCopied -= TARGET_POINTER_SIZE;
16068 // Push the field local.
16071 if (fieldVarDsc->lvRegister)
16073 if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
16075 inst_RV(INS_push, fieldVarDsc->lvRegNum,
16076 genActualType(fieldVarDsc->TypeGet()));
16079 // Prepare the set of vars to be cleared from gcref/gcbyref set
16080 // in case they become dead after genUpdateLife.
16081 // genDoneAddressable() will remove dead gc vars by calling
16082 // gcInfo.gcMarkRegSetNpt.
16083 // Although it is not addrReg, we just borrow the name here.
16084 addrReg |= genRegMask(fieldVarDsc->lvRegNum);
16088 // Must be TYP_FLOAT or TYP_DOUBLE
16089 noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
16091 noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
16092 fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
16094 inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
16097 if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
16102 #if FEATURE_STACK_FP_X87
16103 GenTree* fieldTree = new (compiler, GT_REG_VAR)
16104 GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
16105 fieldTree->gtOper = GT_REG_VAR;
16106 fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
16107 fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
16108 if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
16110 if (fieldVarDsc->lvTracked &&
16111 (deadVarBits == NULL ||
16112 VarSetOps::IsMember(compiler, *deadVarBits,
16113 fieldVarDsc->lvVarIndex)))
16115 fieldTree->gtFlags |= GTF_VAR_DEATH;
16118 genCodeForTreeStackFP_Leaf(fieldTree);
16120 // Take reg to top of stack
16122 FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
16124 // Pop it off to stack
16125 compCurFPState.Pop();
16127 getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
16128 REG_NA, REG_SPBASE, 0);
16130 NYI_FLAT_FP_X87("FP codegen");
16136 getEmitter()->emitIns_S(INS_push,
16137 (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
16143 bytesToBeCopied -= TARGET_POINTER_SIZE;
16145 else // not stack aligned
16147 noway_assert(fieldVarDsc->lvExactSize < 4);
16149 // We will need to use a store byte or store word
16150 // to set this unaligned location
16151 postponedFields = true;
16153 if (expectedAlignedOffset != UINT_MAX)
16155 // This should never change until it is set back to UINT_MAX by an aligned
16157 noway_assert(expectedAlignedOffset ==
16158 roundUp(fieldVarDsc->lvFldOffset, TARGET_POINTER_SIZE) -
16159 TARGET_POINTER_SIZE);
16162 expectedAlignedOffset =
16163 roundUp(fieldVarDsc->lvFldOffset, TARGET_POINTER_SIZE) - TARGET_POINTER_SIZE;
16165 noway_assert(expectedAlignedOffset < bytesToBeCopied);
16167 if (fieldVarDsc->lvRegister)
16169 // Do we need to use a byte-able register?
16170 if (fieldVarDsc->lvExactSize == 1)
16172 // Did we enregister fieldVarDsc2 in a non byte-able register?
16173 if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
16175 // then we will need to grab a byte-able register
16176 postponedRegKind = RBM_BYTE_REGS;
16180 else // not enregistered
16182 if (fieldVarDsc->lvExactSize == 1)
16184 // We will need to grab a byte-able register
16185 postponedRegKind = RBM_BYTE_REGS;
16189 // We will need to grab any scratch register
16190 if (postponedRegKind != RBM_BYTE_REGS)
16191 postponedRegKind = RBM_ALLINT;
16197 // Now we've pushed all of the aligned fields.
16199 // We should have pushed bytes equal to the entire struct
16200 noway_assert(bytesToBeCopied == 0);
16202 // We should have seen a push that covers every postponed field
16203 noway_assert(expectedAlignedOffset == UINT_MAX);
16205 // Did we have any postponed fields?
16206 if (postponedFields)
16208 regNumber regNum = REG_STK; // means no register
16210 // If we needed a scratch register then grab it here
16212 if (postponedRegKind != RBM_NONE)
16213 regNum = regSet.rsGrabReg(postponedRegKind);
16215 // Forward loop, starts from the lowest field offset
16217 for (unsigned varNum = varDsc->lvFieldLclStart;
16218 varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
16220 LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
16222 // All stack aligned fields have already been pushed
16223 if (fieldVarDsc->lvStackAligned())
16226 // We have a postponed field
16228 // It must be a byte or a short
16229 noway_assert(fieldVarDsc->lvExactSize < 4);
16231 // Is the field enregistered?
16232 if (fieldVarDsc->lvRegister)
16234 // Frequently we can just use that register
16235 regNumber tmpRegNum = fieldVarDsc->lvRegNum;
16237 // Do we need to use a byte-able register?
16238 if (fieldVarDsc->lvExactSize == 1)
16240 // Did we enregister the field in a non byte-able register?
16241 if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
16243 // then we will need to use the byte-able register 'regNum'
16244 noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
16246 // Copy the register that contains fieldVarDsc into 'regNum'
16247 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
16248 fieldVarDsc->lvRegNum);
16249 regTracker.rsTrackRegLclVar(regNum, varNum);
16251 // tmpRegNum is the register that we will extract the byte value from
16252 tmpRegNum = regNum;
16254 noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
16257 getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16258 (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
16259 REG_SPBASE, fieldVarDsc->lvFldOffset);
16261 else // not enregistered
16263 // We will copy the non-enregister fieldVar into our scratch register 'regNum'
16265 noway_assert(regNum != REG_STK);
16266 getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
16267 (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
16270 regTracker.rsTrackRegLclVar(regNum, varNum);
16272 // Store the value (byte or short) into the stack
16274 getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
16275 (emitAttr)fieldVarDsc->lvExactSize, regNum,
16276 REG_SPBASE, fieldVarDsc->lvFldOffset);
16280 genUpdateLife(structLocalTree);
16286 genCodeForTree(arg->gtObj.gtOp1, 0);
16287 noway_assert(arg->gtObj.gtOp1->InReg());
16288 regNumber reg = arg->gtObj.gtOp1->gtRegNum;
16289 // Get the number of DWORDS to copy to the stack
16290 opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(DWORD));
16291 unsigned slots = (unsigned)(opsz / sizeof(DWORD));
16293 BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16295 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
16297 BOOL bNoneGC = TRUE;
16298 for (int i = slots - 1; i >= 0; --i)
16300 if (gcLayout[i] != TYPE_GC_NONE)
16307 /* passing large structures using movq instead of pushes does not increase codesize very much */
16308 unsigned movqLenMin = 8;
16309 unsigned movqLenMax = 64;
16310 unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
16312 if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
16314 // Don't bother with this optimization in
16315 // rarely run blocks or when optimizing for size
16316 movqLenMax = movqLenMin = 0;
16318 else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
16320 // Be more aggressive when optimizing for speed
16324 /* Adjust for BB weight */
16325 if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
16327 // Be more aggressive when we are inside a loop
16331 if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
16333 JITLOG_THIS(compiler, (LL_INFO10000,
16334 "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
16335 opsz, compiler->info.compFullName));
16337 int stkDisp = (int)(unsigned)opsz;
16339 regNumber xmmReg = REG_XMM0;
16343 stkDisp -= TARGET_POINTER_SIZE;
16344 getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
16348 inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
16349 AddStackLevel(stkDisp);
16351 while (curDisp < stkDisp)
16353 getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
16354 getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
16355 curDisp += 2 * TARGET_POINTER_SIZE;
16357 noway_assert(curDisp == stkDisp);
16361 for (int i = slots - 1; i >= 0; --i)
16363 emitAttr fieldSize;
16364 if (gcLayout[i] == TYPE_GC_NONE)
16365 fieldSize = EA_4BYTE;
16366 else if (gcLayout[i] == TYPE_GC_REF)
16367 fieldSize = EA_GCREF;
16370 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16371 fieldSize = EA_BYREF;
16373 getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * TARGET_POINTER_SIZE);
16377 gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
16385 noway_assert(!"unhandled/unexpected arg type");
16386 NO_WAY("unhandled/unexpected arg type");
16389 /* Update the current set of live variables */
16391 genUpdateLife(curr);
16393 /* Update the current set of register pointers */
16395 noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
16396 genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
16398 /* Remember how much stuff we've pushed on the stack */
16402 /* Update the current argument stack offset */
16404 /* Continue with the next argument, if any more are present */
16408 /* Move the deferred arguments to registers */
16410 for (args = regArgs; args; args = args->Rest())
16412 curr = args->Current();
16414 assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
16416 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16417 assert(curArgTabEntry);
16418 regNumber regNum = curArgTabEntry->regNum;
16420 noway_assert(isRegParamType(curr->TypeGet()));
16421 noway_assert(curr->gtType != TYP_VOID);
16423 /* Evaluate the argument to a register [pair] */
16425 if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
16427 /* Check if this is the guess area for the resolve interface call
16428 * Pass a size of EA_OFFSET*/
16429 if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
16431 getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
16432 regTracker.rsTrackRegTrash(regNum);
16434 /* The value is now in the appropriate register */
16436 genMarkTreeInReg(curr, regNum);
16440 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
16443 noway_assert(curr->gtRegNum == regNum);
16445 /* If the register is already marked as used, it will become
16446 multi-used. However, since it is a callee-trashed register,
16447 we will have to spill it before the call anyway. So do it now */
16449 if (regSet.rsMaskUsed & genRegMask(regNum))
16451 noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
16452 regSet.rsSpillReg(regNum);
16455 /* Mark the register as 'used' */
16457 regSet.rsMarkRegUsed(curr);
16461 noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
16465 /* If any of the previously loaded arguments were spilled - reload them */
16467 for (args = regArgs; args; args = args->Rest())
16469 curr = args->Current();
16472 if (curr->gtFlags & GTF_SPILLED)
16474 if (isRegPairType(curr->gtType))
16476 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
16480 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
16485 /* Return the total size pushed */
16490 #pragma warning(pop)
16493 #else // FEATURE_FIXED_OUT_ARGS
16496 // ARM and AMD64 uses this method to pass the stack based args
16498 // returns size pushed (always zero)
16499 size_t CodeGen::genPushArgList(GenTreeCall* call)
16501 GenTreeArgList* lateArgs = call->gtCallLateArgs;
16506 GenTreeArgList* args;
16507 // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
16508 // so we can iterate over this argument list more uniformly.
16509 // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
16510 GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCallArgs);
16511 if (call->gtCallObjp == NULL)
16513 args = call->gtCallArgs;
16517 objpArgList.Current() = call->gtCallObjp;
16518 args = &objpArgList;
16521 for (; args; args = args->Rest())
16523 /* Get hold of the next argument value */
16524 curr = args->Current();
16526 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
16527 assert(curArgTabEntry);
16528 regNumber regNum = curArgTabEntry->regNum;
16529 int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
16531 /* See what type of a value we're passing */
16532 type = curr->TypeGet();
16534 if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
16539 // This holds the set of registers corresponding to enregistered promoted struct field variables
16540 // that go dead after this use of the variable in the argument list.
16541 regMaskTP deadFieldVarRegs = RBM_NONE;
16543 argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
16545 if (curr->IsArgPlaceHolderNode())
16547 assert(curr->gtFlags & GTF_LATE_ARG);
16551 if (varTypeIsSmall(type))
16553 // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
16563 #if defined(_TARGET_ARM_)
16565 argSize = (TARGET_POINTER_SIZE * 2);
16567 /* Is the value a constant? */
16569 if (curr->gtOper == GT_CNS_LNG)
16571 assert((curr->gtFlags & GTF_LATE_ARG) == 0);
16573 int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
16574 int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
16576 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
16578 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
16585 genCodeForTree(curr, 0);
16587 if (curr->gtFlags & GTF_LATE_ARG)
16589 // The arg was assigned into a temp and
16590 // will be moved to the correct register or slot later
16592 argSize = 0; // nothing is passed on the stack
16596 // The arg is passed in the outgoing argument area of the stack frame
16598 assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16599 assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
16601 if (type == TYP_LONG)
16603 regNumber regLo = genRegPairLo(curr->gtRegPair);
16604 regNumber regHi = genRegPairHi(curr->gtRegPair);
16606 assert(regLo != REG_STK);
16607 inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
16608 if (regHi == REG_STK)
16610 regHi = regSet.rsPickFreeReg();
16611 inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
16612 regTracker.rsTrackRegTrash(regHi);
16614 inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
16616 else // (type == TYP_DOUBLE)
16618 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16624 #elif defined(_TARGET_64BIT_)
16627 #error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
16635 /* Is the value a constant? */
16637 if (curr->gtOper == GT_CNS_INT)
16639 assert(!(curr->gtFlags & GTF_LATE_ARG));
16642 regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
16646 inst_SA_RV(ins_Store(type), argOffset, reg, type);
16651 GenTreeIntConCommon* con = curr->AsIntConCommon();
16652 bool needReloc = con->ImmedValNeedsReloc(compiler);
16653 emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
16655 instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
16656 compiler->lvaOutgoingArgSpaceVar, argOffset);
16661 /* This is passed as a pointer-sized integer argument */
16663 genCodeForTree(curr, 0);
16665 // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
16666 if (curr->gtFlags & GTF_LATE_ARG)
16668 #ifdef _TARGET_ARM_
16669 argSize = 0; // nothing is passed on the stack
16674 // The arg is passed in the outgoing argument area of the stack frame
16676 assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
16677 assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
16678 inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
16680 if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
16681 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
16686 /* Is this a nothing node, deferred register argument? */
16688 if (curr->gtFlags & GTF_LATE_ARG)
16690 /* Handle side-effects */
16692 if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
16694 #ifdef _TARGET_ARM_
16696 GenTree* curArgNode = curArgTabEntry->node;
16697 var_types curRegArgType = curArgNode->gtType;
16698 assert(curRegArgType != TYP_UNDEF);
16700 if (curRegArgType == TYP_STRUCT)
16702 // If the RHS of the COPYBLK is a promoted struct local, then the use of that
16703 // is an implicit use of all its field vars. If these are last uses, remember that,
16704 // so we can later update the GC compiler->info.
16705 if (curr->OperIsCopyBlkOp())
16706 deadFieldVarRegs |= genFindDeadFieldRegs(curr);
16709 #endif // _TARGET_ARM_
16711 genCodeForTree(curr, 0);
16715 assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
16718 #if defined(_TARGET_ARM_)
16719 argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
16724 for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
16726 GenTree* op1 = arg->gtOp.gtOp1;
16728 genEvalSideEffects(op1);
16729 genUpdateLife(op1);
16734 #ifdef _TARGET_ARM_
16738 GenTree* arg = curr;
16739 while (arg->gtOper == GT_COMMA)
16741 GenTree* op1 = arg->gtOp.gtOp1;
16742 genEvalSideEffects(op1);
16743 genUpdateLife(op1);
16744 arg = arg->gtOp.gtOp2;
16746 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
16748 CORINFO_CLASS_HANDLE clsHnd;
16751 BYTE* gcLayout = NULL;
16753 // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
16754 // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
16755 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
16756 // table entry for the promoted struct local. As we fill slots with the contents of a
16757 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
16758 // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
16759 // variable number of the next field variable to be copied.
16760 LclVarDsc* promotedStructLocalVarDesc = NULL;
16761 GenTree* structLocalTree = NULL;
16762 unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot.
16763 unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
16764 unsigned promotedStructOffsetOfFirstStackSlot = 0;
16765 unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized.
16767 if (arg->OperGet() == GT_OBJ)
16769 clsHnd = arg->gtObj.gtClass;
16770 unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
16772 roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
16773 argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
16775 slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
16777 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
16779 compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
16781 // Are we loading a promoted struct local var?
16782 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
16784 structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
16785 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
16786 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
16788 // As much as we would like this to be a noway_assert, we can't because
16789 // there are some weird casts out there, and backwards compatiblity
16790 // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
16791 // lvPromoted in general currently do not require the local to be
16792 // TYP_STRUCT, so this assert is really more about how we wish the world
16793 // was then some JIT invariant.
16794 assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
16796 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
16798 if (varDsc->lvPromoted &&
16799 promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
16802 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
16803 promotedStructLocalVarDesc = varDsc;
16804 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
16810 noway_assert(arg->OperGet() == GT_MKREFANY);
16813 argAlign = TARGET_POINTER_SIZE;
16814 argSize = 2 * TARGET_POINTER_SIZE;
16818 // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
16819 noway_assert(regNum == REG_STK);
16821 // This code passes a TYP_STRUCT by value using the outgoing arg space var
16823 if (arg->OperGet() == GT_OBJ)
16825 regNumber regSrc = REG_STK;
16826 regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
16827 int cStackSlots = 0;
16829 if (promotedStructLocalVarDesc == NULL)
16831 genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
16832 noway_assert(arg->gtObj.gtOp1->InReg());
16833 regSrc = arg->gtObj.gtOp1->gtRegNum;
16836 // The number of bytes to add "argOffset" to get the arg offset of the current slot.
16837 int extraArgOffset = 0;
16839 for (unsigned i = 0; i < slots; i++)
16841 emitAttr fieldSize;
16842 if (gcLayout[i] == TYPE_GC_NONE)
16843 fieldSize = EA_PTRSIZE;
16844 else if (gcLayout[i] == TYPE_GC_REF)
16845 fieldSize = EA_GCREF;
16848 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
16849 fieldSize = EA_BYREF;
16852 // Pass the argument using the lvaOutgoingArgSpaceVar
16854 if (promotedStructLocalVarDesc != NULL)
16856 if (argOffsetOfFirstStackSlot == UINT32_MAX)
16857 argOffsetOfFirstStackSlot = argOffset;
16859 regNumber maxRegArg = regNumber(MAX_REG_ARG);
16860 bool filledExtraSlot = genFillSlotFromPromotedStruct(
16861 arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
16862 &bytesOfNextSlotOfCurPromotedStruct,
16863 /*pCurRegNum*/ &maxRegArg,
16864 /*argOffset*/ argOffset + extraArgOffset,
16865 /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
16866 argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp);
16867 extraArgOffset += TARGET_POINTER_SIZE;
16868 // If we filled an extra slot with an 8-byte value, skip a slot.
16869 if (filledExtraSlot)
16873 extraArgOffset += TARGET_POINTER_SIZE;
16878 if (regTmp == REG_STK)
16880 regTmp = regSet.rsPickFreeReg();
16883 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
16884 i * TARGET_POINTER_SIZE);
16886 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
16887 compiler->lvaOutgoingArgSpaceVar,
16888 argOffset + cStackSlots * TARGET_POINTER_SIZE);
16889 regTracker.rsTrackRegTrash(regTmp);
16894 if (promotedStructLocalVarDesc == NULL)
16896 regSet.rsMarkRegFree(genRegMask(regSrc));
16898 if (structLocalTree != NULL)
16899 genUpdateLife(structLocalTree);
16903 assert(arg->OperGet() == GT_MKREFANY);
16904 PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
16905 argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
16909 #endif // _TARGET_ARM_
16912 assert(!"unhandled/unexpected arg type");
16913 NO_WAY("unhandled/unexpected arg type");
16916 /* Update the current set of live variables */
16918 genUpdateLife(curr);
16920 // Now, if some copied field locals were enregistered, and they're now dead, update the set of
16921 // register holding gc pointers.
16922 if (deadFieldVarRegs != 0)
16923 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
16925 /* Update the current argument stack offset */
16927 argOffset += argSize;
16929 /* Continue with the next argument, if any more are present */
16934 SetupLateArgs(call);
16937 /* Return the total size pushed */
16942 #ifdef _TARGET_ARM_
16943 bool CodeGen::genFillSlotFromPromotedStruct(GenTree* arg,
16944 fgArgTabEntry* curArgTabEntry,
16945 LclVarDsc* promotedStructLocalVarDesc,
16946 emitAttr fieldSize,
16947 unsigned* pNextPromotedStructFieldVar,
16948 unsigned* pBytesOfNextSlotOfCurPromotedStruct,
16949 regNumber* pCurRegNum,
16951 int fieldOffsetOfFirstStackSlot,
16952 int argOffsetOfFirstStackSlot,
16953 regMaskTP* deadFieldVarRegs,
16954 regNumber* pRegTmp)
16956 unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
16957 unsigned limitPromotedStructFieldVar =
16958 promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
16959 unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
16961 regNumber curRegNum = *pCurRegNum;
16962 regNumber regTmp = *pRegTmp;
16963 bool filledExtraSlot = false;
16965 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
16967 // We've already finished; just return.
16968 // We can reach this because the calling loop computes a # of slots based on the size of the struct.
16969 // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
16970 // the fourth slot, even though we've copied all the fields.
16974 LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
16976 // Does this field fill an entire slot, and does it go at the start of the slot?
16977 // If so, things are easier...
16979 bool oneFieldFillsSlotFromStart =
16980 (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
16981 && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and...
16982 && (nextPromotedStructFieldVar + 1 ==
16983 limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
16984 || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
16986 // Compute the proper size.
16987 if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
16989 switch (fieldVarDsc->lvExactSize)
16992 fieldSize = EA_1BYTE;
16995 fieldSize = EA_2BYTE;
16998 // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
16999 // in which case we should not have promoted the struct variable.
17000 noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
17002 // If the current reg number is not aligned, align it, and return to the calling loop, which will
17003 // consider that a filled slot and move on to the next argument register.
17004 if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
17006 // We must update the slot target, however!
17007 bytesOfNextSlotOfCurPromotedStruct += 4;
17008 *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17011 // Dest is an aligned pair of arg regs, if the struct type demands it.
17012 noway_assert((curRegNum % 2) == 0);
17013 // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
17016 assert(fieldVarDsc->lvExactSize == 4);
17022 // If the gc layout said it's a GC ref or byref, then the field size must be 4.
17023 noway_assert(fieldVarDsc->lvExactSize == 4);
17026 // We may need the type of the field to influence instruction selection.
17027 // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
17028 // If the fieldVarDsc is enregistered float we must use the field's exact type
17029 // however if it is in memory we can use an integer type TYP_I_IMPL
17031 var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
17032 if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
17034 fieldTypeForInstr = TYP_I_IMPL;
17037 // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
17038 if (curArgTabEntry->isHfaRegArg)
17040 assert(oneFieldFillsSlotFromStart);
17042 // Is the field variable promoted?
17043 if (fieldVarDsc->lvRegister)
17045 // Move the field var living in register to dst, if they are different registers.
17046 regNumber srcReg = fieldVarDsc->lvRegNum;
17047 regNumber dstReg = curRegNum;
17048 if (srcReg != dstReg)
17050 inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
17051 assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
17056 // Move the field var living in stack to dst.
17057 getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
17058 fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
17059 nextPromotedStructFieldVar, 0);
17060 assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
17063 // Mark the arg as used and using reg val.
17064 genMarkTreeInReg(arg, curRegNum);
17065 regSet.SetUsedRegFloat(arg, true);
17067 // Advance for double.
17068 if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
17070 bytesOfNextSlotOfCurPromotedStruct += 4;
17071 curRegNum = REG_NEXT(curRegNum);
17072 arg->gtRegNum = curRegNum;
17073 regSet.SetUsedRegFloat(arg, true);
17074 filledExtraSlot = true;
17076 arg->gtRegNum = curArgTabEntry->regNum;
17079 bytesOfNextSlotOfCurPromotedStruct += 4;
17080 nextPromotedStructFieldVar++;
17084 if (oneFieldFillsSlotFromStart)
17086 // If we write to the stack, offset in outgoing args at which we'll write.
17087 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17088 assert(fieldArgOffset >= 0);
17090 // Is the source a register or memory?
17091 if (fieldVarDsc->lvRegister)
17093 if (fieldTypeForInstr == TYP_DOUBLE)
17095 fieldSize = EA_8BYTE;
17098 // Are we writing to a register or to the stack?
17099 if (curRegNum != MAX_REG_ARG)
17101 // Source is register and Dest is register.
17103 instruction insCopy = INS_mov;
17105 if (varTypeIsFloating(fieldTypeForInstr))
17107 if (fieldTypeForInstr == TYP_FLOAT)
17109 insCopy = INS_vmov_f2i;
17113 assert(fieldTypeForInstr == TYP_DOUBLE);
17114 insCopy = INS_vmov_d2i;
17118 // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second
17119 // register (which may become a tmp register, if its held in the argument register that the first
17120 // register to be copied will overwrite).
17121 regNumber otherRegNum = REG_STK;
17122 if (fieldVarDsc->lvType == TYP_LONG)
17124 otherRegNum = fieldVarDsc->lvOtherReg;
17125 // Are we about to overwrite?
17126 if (otherRegNum == curRegNum)
17128 if (regTmp == REG_STK)
17130 regTmp = regSet.rsPickFreeReg();
17132 // Copy the second register to the temp reg.
17133 getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
17134 regTracker.rsTrackRegCopy(regTmp, otherRegNum);
17135 otherRegNum = regTmp;
17139 if (fieldVarDsc->lvType == TYP_DOUBLE)
17141 assert(curRegNum <= REG_R2);
17142 getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
17143 fieldVarDsc->lvRegNum);
17144 regTracker.rsTrackRegTrash(curRegNum);
17145 regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
17149 // Now do the first register.
17150 // It might be the case that it's already in the desired register; if so do nothing.
17151 if (curRegNum != fieldVarDsc->lvRegNum)
17153 getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
17154 regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
17158 // In either case, mark the arg register as used.
17159 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17161 // Is there a second half of the value?
17162 if (fieldVarDsc->lvExactSize == 8)
17164 curRegNum = genRegArgNext(curRegNum);
17165 // The second dest reg must also be an argument register.
17166 noway_assert(curRegNum < MAX_REG_ARG);
17168 // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17169 if (fieldVarDsc->lvType == TYP_LONG)
17171 // Copy the second register into the next argument register
17173 // If it's a register variable for a TYP_LONG value, then otherReg now should
17174 // hold the second register or it might say that it's in the stack.
17175 if (otherRegNum == REG_STK)
17177 // Apparently when we partially enregister, we allocate stack space for the full
17178 // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
17179 // parameter, to get the high half.
17180 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17181 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17182 regTracker.rsTrackRegTrash(curRegNum);
17186 // The other half is in a register.
17187 // Again, it might be the case that it's already in the desired register; if so do
17189 if (curRegNum != otherRegNum)
17191 getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
17192 regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
17197 // Also mark the 2nd arg register as used.
17198 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
17199 // Record the fact that we filled in an extra register slot
17200 filledExtraSlot = true;
17205 // Source is register and Dest is memory (OutgoingArgSpace).
17207 // Now write the srcReg into the right location in the outgoing argument list.
17208 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17209 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17211 if (fieldVarDsc->lvExactSize == 8)
17213 // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
17214 if (fieldVarDsc->lvType == TYP_LONG)
17216 if (fieldVarDsc->lvOtherReg == REG_STK)
17218 // Source is stack.
17219 if (regTmp == REG_STK)
17221 regTmp = regSet.rsPickFreeReg();
17223 // Apparently if we partially enregister, we allocate stack space for the full
17224 // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
17225 // parameter, to get the high half.
17226 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17227 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17228 regTracker.rsTrackRegTrash(regTmp);
17229 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17230 compiler->lvaOutgoingArgSpaceVar,
17231 fieldArgOffset + TARGET_POINTER_SIZE);
17235 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
17236 compiler->lvaOutgoingArgSpaceVar,
17237 fieldArgOffset + TARGET_POINTER_SIZE);
17240 // Record the fact that we filled in an extra register slot
17241 filledExtraSlot = true;
17244 assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
17245 // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
17246 if (arg->gtFlags & GTF_VAR_DEATH)
17248 *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
17249 // We don't bother with the second reg of a register pair, since if it has one,
17250 // it obviously doesn't hold a pointer.
17255 // Source is in memory.
17257 if (curRegNum != MAX_REG_ARG)
17260 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
17261 nextPromotedStructFieldVar, 0);
17262 regTracker.rsTrackRegTrash(curRegNum);
17264 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17266 if (fieldVarDsc->lvExactSize == 8)
17268 noway_assert(fieldSize == EA_4BYTE);
17269 curRegNum = genRegArgNext(curRegNum);
17270 noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
17271 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
17272 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17273 regTracker.rsTrackRegTrash(curRegNum);
17274 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17275 // Record the fact that we filled in an extra stack slot
17276 filledExtraSlot = true;
17282 if (regTmp == REG_STK)
17284 regTmp = regSet.rsPickFreeReg();
17286 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17287 nextPromotedStructFieldVar, 0);
17289 // Now write regTmp into the right location in the outgoing argument list.
17290 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17291 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17292 // We overwrote "regTmp", so erase any previous value we recorded that it contained.
17293 regTracker.rsTrackRegTrash(regTmp);
17295 if (fieldVarDsc->lvExactSize == 8)
17297 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17298 nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
17300 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17301 compiler->lvaOutgoingArgSpaceVar,
17302 fieldArgOffset + TARGET_POINTER_SIZE);
17303 // Record the fact that we filled in an extra stack slot
17304 filledExtraSlot = true;
17309 // Bump up the following if we filled in an extra slot
17310 if (filledExtraSlot)
17311 bytesOfNextSlotOfCurPromotedStruct += 4;
17313 // Go to the next field.
17314 nextPromotedStructFieldVar++;
17315 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17317 fieldVarDsc = NULL;
17321 // The next field should have the same parent variable, and we should have put the field vars in order
17322 // sorted by offset.
17323 assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17324 fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17325 fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17326 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17328 bytesOfNextSlotOfCurPromotedStruct += 4;
17330 else // oneFieldFillsSlotFromStart == false
17332 // The current slot should contain more than one field.
17333 // We'll construct a word in memory for the slot, then load it into a register.
17334 // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
17335 // slot, in which case we'll just skip this loop altogether.)
17336 while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
17338 // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
17339 // whose fields have their natural alignment, and alignment == size on ARM).
17340 noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
17342 // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
17343 int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
17344 noway_assert(argOffset == INT32_MAX ||
17345 (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
17347 if (fieldVarDsc->lvRegister)
17349 if (curRegNum != MAX_REG_ARG)
17351 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17353 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17354 compiler->lvaPromotedStructAssemblyScratchVar,
17355 fieldVarDsc->lvFldOffset % 4);
17359 // Dest is stack; write directly.
17360 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
17361 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17366 // Source is in memory.
17368 // Make sure we have a temporary register to use...
17369 if (regTmp == REG_STK)
17371 regTmp = regSet.rsPickFreeReg();
17373 getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
17374 nextPromotedStructFieldVar, 0);
17375 regTracker.rsTrackRegTrash(regTmp);
17377 if (curRegNum != MAX_REG_ARG)
17379 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17381 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17382 compiler->lvaPromotedStructAssemblyScratchVar,
17383 fieldVarDsc->lvFldOffset % 4);
17387 getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
17388 compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
17391 // Go to the next field.
17392 nextPromotedStructFieldVar++;
17393 if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
17395 fieldVarDsc = NULL;
17399 // The next field should have the same parent variable, and we should have put the field vars in
17400 // order sorted by offset.
17401 noway_assert(fieldVarDsc->lvIsStructField &&
17402 compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
17403 fieldVarDsc->lvParentLcl ==
17404 compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
17405 fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
17406 fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
17409 // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
17410 // write to an argument register, do so.
17411 if (curRegNum != MAX_REG_ARG)
17413 noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
17415 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
17416 compiler->lvaPromotedStructAssemblyScratchVar, 0);
17417 regTracker.rsTrackRegTrash(curRegNum);
17418 regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
17420 // We've finished a slot; set the goal of the next slot.
17421 bytesOfNextSlotOfCurPromotedStruct += 4;
17425 // Write back the updates.
17426 *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
17427 *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
17428 *pCurRegNum = curRegNum;
17431 return filledExtraSlot;
17433 #endif // _TARGET_ARM_
17435 regMaskTP CodeGen::genFindDeadFieldRegs(GenTree* cpBlk)
17437 noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
17438 GenTree* rhs = cpBlk->gtOp.gtOp1;
17440 if (rhs->OperIsIndir())
17442 GenTree* addr = rhs->AsIndir()->Addr();
17443 if (addr->gtOper == GT_ADDR)
17445 rhs = addr->gtOp.gtOp1;
17448 if (rhs->OperGet() == GT_LCL_VAR)
17450 LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
17451 if (rhsDsc->lvPromoted)
17453 // It is promoted; iterate over its field vars.
17454 unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
17455 for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
17457 LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
17458 // Did the variable go dead, and is it enregistered?
17459 if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
17461 // Add the register number to the set of registers holding field vars that are going dead.
17462 res |= genRegMask(fieldVarDsc->lvRegNum);
17470 void CodeGen::SetupLateArgs(GenTreeCall* call)
17472 GenTreeArgList* lateArgs;
17475 /* Generate the code to move the late arguments into registers */
17477 for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
17479 curr = lateArgs->Current();
17482 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
17483 assert(curArgTabEntry);
17484 regNumber regNum = curArgTabEntry->regNum;
17485 unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
17487 assert(isRegParamType(curr->TypeGet()));
17488 assert(curr->gtType != TYP_VOID);
17490 /* If the register is already marked as used, it will become
17491 multi-used. However, since it is a callee-trashed register,
17492 we will have to spill it before the call anyway. So do it now */
17495 // Remember which registers hold pointers. We will spill
17496 // them, but the code that follows will fetch reg vars from
17497 // the registers, so we need that gc compiler->info.
17498 // Also regSet.rsSpillReg doesn't like to spill enregistered
17499 // variables, but if this is their last use that is *exactly*
17500 // what we need to do, so we have to temporarily pretend
17501 // they are no longer live.
17502 // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
17503 // when their last use is about to occur?
17504 // It is because this is the second operand to be evaluated
17505 // of some parent binary op, and the first operand is
17506 // live across this tree, and thought it could re-use the
17507 // variables register (like a GT_REG_VAR). This probably
17508 // is caused by RegAlloc assuming the first operand would
17509 // evaluate into another register.
17510 regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
17511 regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
17512 regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
17513 regSet.RemoveMaskVars(rsTemp);
17515 regNumber regNum2 = regNum;
17516 for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
17518 if (regSet.rsMaskUsed & genRegMask(regNum2))
17520 assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
17521 regSet.rsSpillReg(regNum2);
17523 regNum2 = genRegArgNext(regNum2);
17524 assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
17527 // Restore gc tracking masks.
17528 gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
17529 gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
17531 // Set maskvars back to normal
17532 regSet.AddMaskVars(rsTemp);
17535 /* Evaluate the argument to a register */
17537 /* Check if this is the guess area for the resolve interface call
17538 * Pass a size of EA_OFFSET*/
17539 if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
17541 getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
17542 regTracker.rsTrackRegTrash(regNum);
17544 /* The value is now in the appropriate register */
17546 genMarkTreeInReg(curr, regNum);
17548 regSet.rsMarkRegUsed(curr);
17550 #ifdef _TARGET_ARM_
17551 else if (curr->gtType == TYP_STRUCT)
17553 GenTree* arg = curr;
17554 while (arg->gtOper == GT_COMMA)
17556 GenTree* op1 = arg->gtOp.gtOp1;
17557 genEvalSideEffects(op1);
17558 genUpdateLife(op1);
17559 arg = arg->gtOp.gtOp2;
17561 noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
17562 (arg->OperGet() == GT_MKREFANY));
17564 // This code passes a TYP_STRUCT by value using
17565 // the argument registers first and
17566 // then the lvaOutgoingArgSpaceVar area.
17569 // We prefer to choose low registers here to reduce code bloat
17570 regMaskTP regNeedMask = RBM_LOW_REGS;
17571 unsigned firstStackSlot = 0;
17572 unsigned argAlign = TARGET_POINTER_SIZE;
17573 size_t originalSize = InferStructOpSizeAlign(arg, &argAlign);
17575 unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
17578 if (regNum == REG_STK)
17580 firstStackSlot = 0;
17584 if (argAlign == (TARGET_POINTER_SIZE * 2))
17586 assert((regNum & 1) == 0);
17589 // firstStackSlot is an index of the first slot of the struct
17590 // that is on the stack, in the range [0,slots]. If it is 'slots',
17591 // then the entire struct is in registers. It is also equal to
17592 // the number of slots of the struct that are passed in registers.
17594 if (curArgTabEntry->isHfaRegArg)
17596 // HFA arguments that have been decided to go into registers fit the reg space.
17597 assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
17598 assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
17599 "HFA argument doesn't fit entirely in FP argument registers");
17600 firstStackSlot = slots;
17602 else if (regNum + slots > MAX_REG_ARG)
17604 firstStackSlot = MAX_REG_ARG - regNum;
17605 assert(firstStackSlot > 0);
17609 firstStackSlot = slots;
17612 if (curArgTabEntry->isHfaRegArg)
17614 // Mask out the registers used by an HFA arg from the ones used to compute tree into.
17615 for (unsigned i = regNum; i < regNum + slots; i++)
17617 regNeedMask &= ~genRegMask(regNumber(i));
17622 // This holds the set of registers corresponding to enregistered promoted struct field variables
17623 // that go dead after this use of the variable in the argument list.
17624 regMaskTP deadFieldVarRegs = RBM_NONE;
17626 // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
17627 // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
17628 // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
17629 // table entry for the promoted struct local. As we fill slots with the contents of a
17630 // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
17631 // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
17632 // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
17633 // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
17635 LclVarDsc* promotedStructLocalVarDesc = NULL;
17636 unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
17637 unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
17638 GenTree* structLocalTree = NULL;
17640 BYTE* gcLayout = NULL;
17641 regNumber regSrc = REG_NA;
17642 if (arg->gtOper == GT_OBJ)
17644 // Are we loading a promoted struct local var?
17645 if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
17647 structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
17648 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17649 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
17651 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17653 if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17657 // Fix 388395 ARM JitStress WP7
17658 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17660 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17661 promotedStructLocalVarDesc = varDsc;
17662 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17666 if (promotedStructLocalVarDesc == NULL)
17668 // If it's not a promoted struct variable, set "regSrc" to the address
17669 // of the struct local.
17670 genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
17671 noway_assert(arg->gtObj.gtOp1->InReg());
17672 regSrc = arg->gtObj.gtOp1->gtRegNum;
17673 // Remove this register from the set of registers that we pick from, unless slots equals 1
17675 regNeedMask &= ~genRegMask(regSrc);
17678 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17679 compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
17681 else if (arg->gtOper == GT_LCL_VAR)
17683 // Move the address of the LCL_VAR in arg into reg
17685 unsigned varNum = arg->gtLclVarCommon.gtLclNum;
17687 // Are we loading a promoted struct local var?
17688 structLocalTree = arg;
17689 unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
17690 LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
17692 noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
17694 Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
17696 if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
17697 // guaranteed to live
17700 assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
17701 promotedStructLocalVarDesc = varDsc;
17702 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17705 if (promotedStructLocalVarDesc == NULL)
17707 regSrc = regSet.rsPickFreeReg(regNeedMask);
17708 // Remove this register from the set of registers that we pick from, unless slots equals 1
17710 regNeedMask &= ~genRegMask(regSrc);
17712 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
17713 regTracker.rsTrackRegTrash(regSrc);
17715 if (varDsc->lvExactSize >= TARGET_POINTER_SIZE)
17717 gcLayout = compiler->lvaGetGcLayout(varNum);
17721 gcLayout = new (compiler, CMK_Codegen) BYTE[1];
17722 gcLayout[0] = TYPE_GC_NONE;
17726 else if (arg->gtOper == GT_MKREFANY)
17728 assert(slots == 2);
17729 assert((firstStackSlot == 1) || (firstStackSlot == 2));
17730 assert(argOffset == 0); // ???
17731 PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
17733 // Adjust argOffset if part of this guy was pushed onto the stack
17734 if (firstStackSlot < slots)
17736 argOffset += TARGET_POINTER_SIZE;
17739 // Skip the copy loop below because we have already placed the argument in the right place
17745 assert(!"Unsupported TYP_STRUCT arg kind");
17746 gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
17749 if (promotedStructLocalVarDesc != NULL)
17751 // We must do do the stack parts first, since those might need values
17752 // from argument registers that will be overwritten in the portion of the
17753 // loop that writes into the argument registers.
17754 bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
17755 // Now find the var number of the first that starts in the first stack slot.
17756 unsigned fieldVarLim =
17757 promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
17758 while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
17759 (firstStackSlot * TARGET_POINTER_SIZE) &&
17760 nextPromotedStructFieldVar < fieldVarLim)
17762 nextPromotedStructFieldVar++;
17764 // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
17765 // first stack slot is after the last slot.
17766 assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
17769 if (slots > 0) // the mkref case may have set "slots" to zero.
17771 // First pass the stack portion of the struct (if any)
17773 int argOffsetOfFirstStackSlot = argOffset;
17774 for (unsigned i = firstStackSlot; i < slots; i++)
17776 emitAttr fieldSize;
17777 if (gcLayout[i] == TYPE_GC_NONE)
17778 fieldSize = EA_PTRSIZE;
17779 else if (gcLayout[i] == TYPE_GC_REF)
17780 fieldSize = EA_GCREF;
17783 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17784 fieldSize = EA_BYREF;
17787 regNumber maxRegArg = regNumber(MAX_REG_ARG);
17788 if (promotedStructLocalVarDesc != NULL)
17790 regNumber regTmp = REG_STK;
17792 bool filledExtraSlot =
17793 genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
17794 &nextPromotedStructFieldVar,
17795 &bytesOfNextSlotOfCurPromotedStruct,
17796 /*pCurRegNum*/ &maxRegArg, argOffset,
17797 /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
17798 TARGET_POINTER_SIZE,
17799 argOffsetOfFirstStackSlot, &deadFieldVarRegs, ®Tmp);
17800 if (filledExtraSlot)
17803 argOffset += TARGET_POINTER_SIZE;
17806 else // (promotedStructLocalVarDesc == NULL)
17808 // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
17809 // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
17810 // to regSet.rsPickFreeReg, so we need to be a little more forceful.
17811 // Otherwise, just re-use the same register.
17813 regNumber regTmp = regSrc;
17816 regMaskTP regSrcUsed;
17817 regSet.rsLockReg(genRegMask(regSrc), ®SrcUsed);
17819 regTmp = regSet.rsPickFreeReg(regNeedMask);
17821 noway_assert(regTmp != regSrc);
17823 regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
17826 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
17827 i * TARGET_POINTER_SIZE);
17829 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
17830 compiler->lvaOutgoingArgSpaceVar, argOffset);
17831 regTracker.rsTrackRegTrash(regTmp);
17833 argOffset += TARGET_POINTER_SIZE;
17836 // Now pass the register portion of the struct
17839 bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
17840 if (promotedStructLocalVarDesc != NULL)
17841 nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
17843 // Create a nested loop here so that the first time thru the loop
17844 // we setup all of the regArg registers except for possibly
17845 // the one that would overwrite regSrc. Then in the final loop
17846 // (if necessary) we just setup regArg/regSrc with the overwrite
17848 bool overwriteRegSrc = false;
17849 bool needOverwriteRegSrc = false;
17852 if (needOverwriteRegSrc)
17853 overwriteRegSrc = true;
17855 for (unsigned i = 0; i < firstStackSlot; i++)
17857 regNumber regArg = (regNumber)(regNum + i);
17859 if (overwriteRegSrc == false)
17861 if (regArg == regSrc)
17863 needOverwriteRegSrc = true;
17869 if (regArg != regSrc)
17873 emitAttr fieldSize;
17874 if (gcLayout[i] == TYPE_GC_NONE)
17875 fieldSize = EA_PTRSIZE;
17876 else if (gcLayout[i] == TYPE_GC_REF)
17877 fieldSize = EA_GCREF;
17880 noway_assert(gcLayout[i] == TYPE_GC_BYREF);
17881 fieldSize = EA_BYREF;
17884 regNumber regTmp = REG_STK;
17885 if (promotedStructLocalVarDesc != NULL)
17887 bool filledExtraSlot =
17888 genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
17889 fieldSize, &nextPromotedStructFieldVar,
17890 &bytesOfNextSlotOfCurPromotedStruct,
17891 /*pCurRegNum*/ ®Arg,
17892 /*argOffset*/ INT32_MAX,
17893 /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
17894 /*argOffsetOfFirstStackSlot*/ INT32_MAX,
17895 &deadFieldVarRegs, ®Tmp);
17896 if (filledExtraSlot)
17901 getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
17902 fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
17904 regTracker.rsTrackRegTrash(regArg);
17906 } while (needOverwriteRegSrc != overwriteRegSrc);
17909 if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
17911 regSet.rsMarkRegFree(genRegMask(regSrc));
17914 if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
17918 for (unsigned i = 1; i < firstStackSlot; i++)
17920 arg->gtRegNum = (regNumber)(regNum + i);
17921 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17923 arg->gtRegNum = regNum;
17924 curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
17927 // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
17928 // so update liveness.
17929 genUpdateLife(arg);
17931 // Now, if some copied field locals were enregistered, and they're now dead, update the set of
17932 // register holding gc pointers.
17933 if (deadFieldVarRegs != RBM_NONE)
17934 gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
17936 else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
17938 if (curArgTabEntry->regNum == REG_STK)
17940 // The arg is passed in the outgoing argument area of the stack frame
17941 genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
17942 assert(curr->InReg()); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
17944 inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
17945 inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
17949 assert(regNum < REG_ARG_LAST);
17950 regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
17951 genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
17952 assert(curr->gtRegPair == regPair);
17953 regSet.rsMarkRegPairUsed(curr);
17956 #endif // _TARGET_ARM_
17957 else if (curArgTabEntry->regNum == REG_STK)
17959 // The arg is passed in the outgoing argument area of the stack frame
17961 genCodeForTree(curr, 0);
17962 assert(curr->InReg()); // should be enregistered after genCodeForTree(curr, 0)
17964 inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
17966 if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
17967 gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
17971 if (!varTypeIsFloating(curr->gtType))
17973 genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
17974 assert(curr->gtRegNum == regNum);
17975 regSet.rsMarkRegUsed(curr);
17977 else // varTypeIsFloating(curr->gtType)
17979 if (genIsValidFloatReg(regNum))
17981 genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
17983 assert(curr->gtRegNum == regNum);
17984 regSet.rsMarkRegUsed(curr);
17988 genCodeForTree(curr, 0);
17989 // If we are loading a floating point type into integer registers
17990 // then it must be for varargs.
17991 // genCodeForTree will load it into a floating point register,
17992 // now copy it into the correct integer register(s)
17993 if (curr->TypeGet() == TYP_FLOAT)
17995 assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
17996 regSet.rsSpillRegIfUsed(regNum);
17997 #ifdef _TARGET_ARM_
17998 getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
18000 #error "Unsupported target"
18002 regTracker.rsTrackRegTrash(regNum);
18004 curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register
18005 curr->gtRegNum = regNum;
18006 regSet.rsMarkRegUsed(curr);
18010 assert(curr->TypeGet() == TYP_DOUBLE);
18011 regNumber intRegNumLo = regNum;
18012 curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
18013 #ifdef _TARGET_ARM_
18014 regNumber intRegNumHi = regNumber(intRegNumLo + 1);
18015 assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
18016 assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
18017 regSet.rsSpillRegIfUsed(intRegNumHi);
18018 regSet.rsSpillRegIfUsed(intRegNumLo);
18020 getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
18021 regTracker.rsTrackRegTrash(intRegNumLo);
18022 regTracker.rsTrackRegTrash(intRegNumHi);
18023 curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
18024 regSet.rsMarkRegPairUsed(curr);
18026 #error "Unsupported target"
18034 /* If any of the previously loaded arguments were spilled - reload them */
18036 for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
18038 curr = lateArgs->Current();
18041 if (curr->gtFlags & GTF_SPILLED)
18043 if (isRegPairType(curr->gtType))
18045 regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
18049 regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
18055 #ifdef _TARGET_ARM_
18057 // 'Push' a single GT_MKREFANY argument onto a call's argument list
18058 // The argument is passed as described by the fgArgTabEntry
18059 // If any part of the struct is to be passed in a register the
18060 // regNum value will be equal to the the registers used to pass the
18061 // the first part of the struct.
18062 // If any part is to go onto the stack, we first generate the
18063 // value into a register specified by 'regNeedMask' and
18064 // then store it to the out going argument area.
18065 // When this method returns, both parts of the TypeReference have
18066 // been pushed onto the stack, but *no* registers have been marked
18067 // as 'in-use', that is the responsibility of the caller.
18069 void CodeGen::PushMkRefAnyArg(GenTree* mkRefAnyTree, fgArgTabEntry* curArgTabEntry, regMaskTP regNeedMask)
18071 regNumber regNum = curArgTabEntry->regNum;
18073 assert(mkRefAnyTree->gtOper == GT_MKREFANY);
18074 regMaskTP arg1RegMask = 0;
18075 int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
18077 // Construct the TypedReference directly into the argument list of the call by
18078 // 'pushing' the first field of the typed reference: the pointer.
18079 // Do this by directly generating it into the argument register or outgoing arg area of the stack.
18080 // Mark it as used so we don't trash it while generating the second field.
18082 if (regNum == REG_STK)
18084 genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18085 noway_assert(mkRefAnyTree->gtOp.gtOp1->InReg());
18086 regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
18087 inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
18088 regTracker.rsTrackRegTrash(tmpReg1);
18089 argOffset += TARGET_POINTER_SIZE;
18094 assert(regNum <= REG_ARG_LAST);
18095 arg1RegMask = genRegMask(regNum);
18096 genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
18097 regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
18100 // Now 'push' the second field of the typed reference: the method table.
18101 if (regNum2 == REG_STK)
18103 genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
18104 noway_assert(mkRefAnyTree->gtOp.gtOp2->InReg());
18105 regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
18106 inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
18107 regTracker.rsTrackRegTrash(tmpReg2);
18111 assert(regNum2 <= REG_ARG_LAST);
18112 // We don't have to mark this register as being in use here because it will
18113 // be done by the caller, and we don't want to double-count it.
18114 genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
18117 // Now that we are done generating the second part of the TypeReference, we can mark
18118 // the first register as free.
18119 // The caller in the shared path we will re-mark all registers used by this argument
18120 // as being used, so we don't want to double-count this one.
18121 if (arg1RegMask != 0)
18123 GenTree* op1 = mkRefAnyTree->gtOp.gtOp1;
18124 if (op1->gtFlags & GTF_SPILLED)
18126 /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
18128 regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
18132 regSet.rsMarkRegFree(arg1RegMask);
18136 #endif // _TARGET_ARM_
18138 #endif // FEATURE_FIXED_OUT_ARGS
18140 regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreeCall* call)
18142 assert((gtCallTypes)call->gtCallType == CT_INDIRECT);
18144 regMaskTP fptrRegs;
18146 /* Loading the indirect call target might cause one or more of the previously
18147 loaded argument registers to be spilled. So, we save information about all
18148 the argument registers, and unspill any of them that get spilled, after
18149 the call target is loaded.
18158 } regArgTab[MAX_REG_ARG];
18160 /* Record the previously loaded arguments, if any */
18163 regMaskTP prefRegs = regSet.rsRegMaskFree();
18164 regMaskTP argRegs = RBM_NONE;
18165 for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18168 regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
18169 GenTree* argTree = regSet.rsUsedTree[regNum];
18170 regArgTab[regIndex].node = argTree;
18171 if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
18173 assert(argTree->InReg());
18174 if (isRegPairType(argTree->gtType))
18176 regPairNo regPair = argTree->gtRegPair;
18177 assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
18178 regArgTab[regIndex].regPair = regPair;
18179 mask = genRegPairMask(regPair);
18183 assert(regNum == argTree->gtRegNum);
18184 regArgTab[regIndex].regNum = regNum;
18185 mask = genRegMask(regNum);
18187 assert(!(prefRegs & mask));
18192 /* Record the register(s) used for the indirect call func ptr */
18193 fptrRegs = genMakeRvalueAddressable(call->gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
18195 /* If any of the previously loaded arguments were spilled, reload them */
18197 for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
18199 GenTree* argTree = regArgTab[regIndex].node;
18200 if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
18202 assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
18203 if (isRegPairType(argTree->gtType))
18205 regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
18209 regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
18214 /* Make sure the target is still addressable while avoiding the argument registers */
18216 fptrRegs = genKeepAddressable(call->gtCallAddr, fptrRegs, argRegs);
18221 /*****************************************************************************
18223 * Generate code for a call. If the call returns a value in register(s), the
18224 * register mask that describes where the result will be found is returned;
18225 * otherwise, RBM_NONE is returned.
18229 #pragma warning(push)
18230 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
18232 regMaskTP CodeGen::genCodeForCall(GenTreeCall* call, bool valUsed)
18238 emitter::EmitCallType emitCallType;
18240 unsigned saveStackLvl;
18242 BasicBlock* returnLabel = DUMMY_INIT(NULL);
18243 LclVarDsc* frameListRoot = NULL;
18245 unsigned savCurIntArgReg;
18246 unsigned savCurFloatArgReg;
18250 regMaskTP fptrRegs = RBM_NONE;
18251 regMaskTP vptrMask = RBM_NONE;
18254 unsigned stackLvl = getEmitter()->emitCurStackLvl;
18256 if (compiler->verbose)
18258 printf("\t\t\t\t\t\t\tBeg call ");
18259 Compiler::printTreeID(call);
18260 printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
18264 #ifdef _TARGET_ARM_
18265 if (compiler->opts.ShouldUsePInvokeHelpers() && (call->gtFlags & GTF_CALL_UNMANAGED) && !call->IsVirtual())
18267 (void)genPInvokeCallProlog(nullptr, 0, (CORINFO_METHOD_HANDLE) nullptr, nullptr);
18271 gtCallTypes callType = (gtCallTypes)call->gtCallType;
18272 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
18274 CORINFO_SIG_INFO* sigInfo = nullptr;
18276 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
18278 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
18281 /* Make some sanity checks on the call node */
18283 // "this" only makes sense for user functions
18284 noway_assert(call->gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
18285 // tailcalls won't be done for helpers, caller-pop args, and check that
18286 // the global flag is set
18287 noway_assert(!call->IsTailCall() ||
18288 (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
18291 // Pass the call signature information down into the emitter so the emitter can associate
18292 // native call sites with the signatures they were generated from.
18293 if (callType != CT_HELPER)
18295 sigInfo = call->callSig;
18299 unsigned pseudoStackLvl = 0;
18301 if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
18303 noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
18305 pseudoStackLvl = genStackLevel;
18307 noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
18308 "so fgAddCodeRef() should have set isFramePointerRequired()");
18311 /* Mark the current stack level and list of pointer arguments */
18313 saveStackLvl = genStackLevel;
18315 /*-------------------------------------------------------------------------
18316 * Set up the registers and arguments
18319 /* We'll keep track of how much we've pushed on the stack */
18323 /* We need to get a label for the return address with the proper stack depth. */
18324 /* For the callee pops case (the default) that is before the args are pushed. */
18326 if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
18328 returnLabel = genCreateTempLabel();
18332 Make sure to save the current argument register status
18333 in case we have nested calls.
18336 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
18337 savCurIntArgReg = intRegState.rsCurRegArgNum;
18338 savCurFloatArgReg = floatRegState.rsCurRegArgNum;
18339 intRegState.rsCurRegArgNum = 0;
18340 floatRegState.rsCurRegArgNum = 0;
18342 /* Pass the arguments */
18344 if ((call->gtCallObjp != NULL) || (call->gtCallArgs != NULL))
18346 argSize += genPushArgList(call);
18349 /* We need to get a label for the return address with the proper stack depth. */
18350 /* For the caller pops case (cdecl) that is after the args are pushed. */
18352 if (call->gtFlags & GTF_CALL_UNMANAGED)
18354 if (call->gtFlags & GTF_CALL_POP_ARGS)
18355 returnLabel = genCreateTempLabel();
18357 /* Make sure that we now have a label */
18358 noway_assert(returnLabel != DUMMY_INIT(NULL));
18361 if (callType == CT_INDIRECT)
18363 fptrRegs = genLoadIndirectCallTarget(call);
18366 /* Make sure any callee-trashed registers are saved */
18368 regMaskTP calleeTrashedRegs = RBM_NONE;
18370 #if GTF_CALL_REG_SAVE
18371 if (call->gtFlags & GTF_CALL_REG_SAVE)
18373 /* The return value reg(s) will definitely be trashed */
18375 switch (call->gtType)
18380 #if !CPU_HAS_FP_SUPPORT
18383 calleeTrashedRegs = RBM_INTRET;
18387 #if !CPU_HAS_FP_SUPPORT
18390 calleeTrashedRegs = RBM_LNGRET;
18394 #if CPU_HAS_FP_SUPPORT
18398 calleeTrashedRegs = 0;
18402 noway_assert(!"unhandled/unexpected type");
18408 calleeTrashedRegs = RBM_CALLEE_TRASH;
18411 /* Spill any callee-saved registers which are being used */
18413 regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
18415 /* We need to save all GC registers to the InlinedCallFrame.
18416 Instead, just spill them to temps. */
18418 if (call->gtFlags & GTF_CALL_UNMANAGED)
18419 spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
18421 // Ignore fptrRegs as it is needed only to perform the indirect call
18423 spillRegs &= ~fptrRegs;
18425 /* Do not spill the argument registers.
18426 Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
18428 noway_assert((regSet.rsMaskMult & call->gtCallRegUsedMask) == 0);
18429 spillRegs &= ~call->gtCallRegUsedMask;
18433 regSet.rsSpillRegs(spillRegs);
18436 #if FEATURE_STACK_FP_X87
18438 SpillForCallStackFP();
18440 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
18443 regNumber regReturn = regSet.PickRegFloat();
18445 // Assign reg to tree
18446 genMarkTreeInReg(call, regReturn);
18449 regSet.SetUsedRegFloat(call, true);
18452 compCurFPState.Push(regReturn);
18455 SpillForCallRegisterFP(call->gtCallRegUsedMask);
18458 /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
18460 retSize = EA_PTRSIZE;
18464 if (call->gtType == TYP_REF)
18466 retSize = EA_GCREF;
18468 else if (call->gtType == TYP_BYREF)
18470 retSize = EA_BYREF;
18474 /*-------------------------------------------------------------------------
18475 * For caller-pop calls, the GC info will report the arguments as pending
18476 arguments as the caller explicitly pops them. Also should be
18477 reported as non-GC arguments as they effectively go dead at the
18478 call site (callee owns them)
18481 args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
18483 #ifdef PROFILING_SUPPORTED
18485 /*-------------------------------------------------------------------------
18486 * Generate the profiling hooks for the call
18489 /* Treat special cases first */
18491 /* fire the event at the call site */
18492 /* alas, right now I can only handle calls via a method handle */
18493 if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->IsTailCall())
18495 unsigned saveStackLvl2 = genStackLevel;
18498 // Push the profilerHandle
18500 CLANG_FORMAT_COMMENT_ANCHOR;
18502 #ifdef _TARGET_X86_
18503 regMaskTP byrefPushedRegs;
18504 regMaskTP norefPushedRegs;
18505 regMaskTP pushedArgRegs = genPushRegs(call->gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
18507 if (compiler->compProfilerMethHndIndirected)
18509 getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
18510 (ssize_t)compiler->compProfilerMethHnd);
18514 inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
18518 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18519 sizeof(int) * 1, // argSize
18520 EA_UNKNOWN); // retSize
18523 // Adjust the number of stack slots used by this managed method if necessary.
18525 if (compiler->fgPtrArgCntMax < 1)
18527 JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
18528 compiler->fgPtrArgCntMax = 1;
18531 genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
18533 // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
18534 // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
18535 // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
18537 // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
18538 // to disturb them and hence argument registers are locked here.
18539 regMaskTP usedMask = RBM_NONE;
18540 regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
18542 regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
18543 regSet.rsLockReg(genRegMask(scratchReg));
18545 emitAttr attr = EA_UNKNOWN;
18546 if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
18549 gcInfo.gcMarkRegSetGCref(scratchReg);
18551 else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
18554 gcInfo.gcMarkRegSetByref(scratchReg);
18561 getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
18562 regTracker.rsTrackRegTrash(scratchReg);
18564 if (compiler->compProfilerMethHndIndirected)
18566 getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18567 regTracker.rsTrackRegTrash(REG_R0);
18571 instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
18574 genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
18576 EA_UNKNOWN); // retSize
18578 // Restore back to the state that existed before profiler callback
18579 gcInfo.gcMarkRegSetNpt(scratchReg);
18580 getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
18581 regTracker.rsTrackRegTrash(REG_R0);
18582 regSet.rsUnlockReg(genRegMask(scratchReg));
18583 regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
18585 NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
18586 #endif //_TARGET_X86_
18588 /* Restore the stack level */
18589 SetStackLevel(saveStackLvl2);
18592 #endif // PROFILING_SUPPORTED
18595 /*-------------------------------------------------------------------------
18596 * Generate an ESP check for the call
18599 if (compiler->opts.compStackCheckOnCall
18600 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
18601 // check the stacks as frequently as possible
18602 && !call->IsHelperCall()
18604 && call->gtCallType == CT_USER_FUNC
18608 noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
18609 compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
18610 compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
18611 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
18615 /*-------------------------------------------------------------------------
18616 * Generate the call
18619 bool fPossibleSyncHelperCall = false;
18620 CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
18622 bool fTailCallTargetIsVSD = false;
18624 bool fTailCall = (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
18626 /* Check for Delegate.Invoke. If so, we inline it. We get the
18627 target-object and target-function from the delegate-object, and do
18631 if ((call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
18633 noway_assert(call->gtCallType == CT_USER_FUNC);
18635 assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
18636 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
18637 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
18639 /* Find the offsets of the 'this' pointer and new target */
18641 CORINFO_EE_INFO* pInfo;
18642 unsigned instOffs; // offset of new 'this' pointer
18643 unsigned firstTgtOffs; // offset of first target to invoke
18644 const regNumber regThis = genGetThisArgReg(call);
18646 pInfo = compiler->eeGetEEInfo();
18647 instOffs = pInfo->offsetOfDelegateInstance;
18648 firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
18650 #ifdef _TARGET_ARM_
18651 // Ensure that we don't trash any of these registers if we have to load
18652 // the helper call target into a register to invoke it.
18653 regMaskTP regsUsed = 0;
18655 if ((call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
18657 getEmitter()->emitIns_R_R_I(INS_add, EA_BYREF, compiler->virtualStubParamInfo->GetReg(), regThis,
18658 pInfo->offsetOfSecureDelegateIndirectCell);
18659 regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18661 // Ensure that the virtual stub param info register doesn't get reused before the call is taken
18662 regSet.rsLockReg(compiler->virtualStubParamInfo->GetRegMask(), ®sUsed);
18665 #endif // _TARGET_ARM_
18667 // Grab an available register to use for the CALL indirection
18668 regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
18670 // Save the invoke-target-function in indCallReg
18671 // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
18672 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
18673 regTracker.rsTrackRegTrash(indCallReg);
18675 /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
18677 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
18678 regTracker.rsTrackRegTrash(regThis);
18679 noway_assert(instOffs < 127);
18681 /* Call through indCallReg */
18683 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18685 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18686 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18687 gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
18689 #ifdef _TARGET_ARM_
18690 if ((call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
18692 regSet.rsUnlockReg(compiler->virtualStubParamInfo->GetRegMask(), regsUsed);
18694 #endif // _TARGET_ARM_
18698 /*-------------------------------------------------------------------------
18699 * Virtual and interface calls
18702 switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
18704 case GTF_CALL_VIRT_STUB:
18706 regSet.rsSetRegsModified(compiler->virtualStubParamInfo->GetRegMask());
18708 // An x86 JIT which uses full stub dispatch must generate only
18709 // the following stub dispatch calls:
18711 // (1) isCallRelativeIndirect:
18712 // call dword ptr [rel32] ; FF 15 ---rel32----
18713 // (2) isCallRelative:
18714 // call abc ; E8 ---rel32----
18715 // (3) isCallRegisterIndirect:
18717 // call dword ptr [eax] ; FF 10
18719 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18720 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18723 // Please do not insert any Random NOPs while constructing this VSD call
18725 getEmitter()->emitDisableRandomNops();
18729 // This is code to set up an indirect call to a stub address computed
18730 // via dictionary lookup. However the dispatch stub receivers aren't set up
18731 // to accept such calls at the moment.
18732 if (callType == CT_INDIRECT)
18736 // -------------------------------------------------------------------------
18737 // The importer decided we needed a stub call via a computed
18738 // stub dispatch address, i.e. an address which came from a dictionary lookup.
18739 // - The dictionary lookup produces an indirected address, suitable for call
18740 // via "call [virtualStubParamInfo.reg]"
18742 // This combination will only be generated for shared generic code and when
18743 // stub dispatch is active.
18745 // No need to null check the this pointer - the dispatch code will deal with this.
18747 noway_assert(genStillAddressable(call->gtCallAddr));
18749 // Now put the address in virtualStubParamInfo.reg.
18750 // This is typically a nop when the register used for
18751 // the gtCallAddr is virtualStubParamInfo.reg
18753 inst_RV_TT(INS_mov, compiler->virtualStubParamInfo->GetReg(), call->gtCallAddr);
18754 regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18756 #if defined(_TARGET_X86_)
18757 // Emit enough bytes of nops so that this sequence can be distinguished
18758 // from other virtual stub dispatch calls.
18760 // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
18761 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
18763 getEmitter()->emitIns_Nop(3);
18765 // Make the virtual stub call:
18766 // call [virtualStubParamInfo.reg]
18768 emitCallType = emitter::EC_INDIR_ARD;
18770 indReg = compiler->virtualStubParamInfo->GetReg();
18771 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18773 #elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
18775 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18777 // Make the virtual stub call:
18778 // ldr indReg, [virtualStubParamInfo.reg]
18781 emitCallType = emitter::EC_INDIR_R;
18783 // Now dereference [virtualStubParamInfo.reg] and put it in a new temp register 'indReg'
18785 indReg = regSet.rsGrabReg(RBM_ALLINT & ~compiler->virtualStubParamInfo->GetRegMask());
18786 assert(call->gtCallAddr->InReg());
18787 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg,
18788 compiler->virtualStubParamInfo->GetReg(), 0);
18789 regTracker.rsTrackRegTrash(indReg);
18792 #error "Unknown target for VSD call"
18795 getEmitter()->emitIns_Call(emitCallType,
18797 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18798 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18799 gcInfo.gcRegByrefSetCur, ilOffset, indReg);
18803 // -------------------------------------------------------------------------
18804 // Check for a direct stub call.
18807 // Get stub addr. This will return NULL if virtual call stubs are not active
18808 void* stubAddr = NULL;
18810 stubAddr = (void*)call->gtStubCallStubAddr;
18812 noway_assert(stubAddr != NULL);
18814 // -------------------------------------------------------------------------
18815 // Direct stub calls, though the stubAddr itself may still need to be
18816 // accesed via an indirection.
18819 // No need to null check - the dispatch code will deal with null this.
18821 emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
18822 void* addr = stubAddr;
18824 regNumber callReg = REG_NA;
18826 if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
18828 #if CPU_LOAD_STORE_ARCH
18829 callReg = regSet.rsGrabReg(compiler->virtualStubParamInfo->GetRegMask());
18830 noway_assert(callReg == compiler->virtualStubParamInfo->GetReg());
18832 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, compiler->virtualStubParamInfo->GetReg(),
18833 (ssize_t)stubAddr);
18834 // The stub will write-back to this register, so don't track it
18835 regTracker.rsTrackRegTrash(compiler->virtualStubParamInfo->GetReg());
18837 if (compiler->IsTargetAbi(CORINFO_CORERT_ABI))
18839 indReg = regSet.rsGrabReg(RBM_ALLINT & ~compiler->virtualStubParamInfo->GetRegMask());
18843 indReg = REG_JUMP_THUNK_PARAM;
18845 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg,
18846 compiler->virtualStubParamInfo->GetReg(), 0);
18847 regTracker.rsTrackRegTrash(indReg);
18848 callTypeStubAddr = emitter::EC_INDIR_R;
18849 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
18851 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
18852 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18853 gcInfo.gcRegByrefSetCur, ilOffset, indReg);
18856 // emit an indirect call
18857 callTypeStubAddr = emitter::EC_INDIR_C;
18859 disp = (ssize_t)stubAddr;
18862 #if CPU_LOAD_STORE_ARCH
18863 if (callTypeStubAddr != emitter::EC_INDIR_R)
18866 getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCallMethHnd,
18867 INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
18868 gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
18869 gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
18873 else // tailCall is true
18876 // Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
18877 // moves the this pointer out of it's usual place and into the argument list.
18878 #ifdef _TARGET_X86_
18880 // Generate "cmp ECX, [ECX]" to trap null pointers
18881 const regNumber regThis = genGetThisArgReg(call);
18882 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
18884 #endif // _TARGET_X86_
18886 if (callType == CT_INDIRECT)
18888 noway_assert(genStillAddressable(call->gtCallAddr));
18890 // Now put the address in EAX.
18891 inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
18892 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
18894 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
18898 // importer/EE should guarantee the indirection
18899 noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
18901 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
18902 ssize_t(call->gtStubCallStubAddr));
18905 fTailCallTargetIsVSD = true;
18909 // OK to start inserting random NOPs again
18911 getEmitter()->emitEnableRandomNops();
18915 case GTF_CALL_VIRT_VTABLE:
18916 // stub dispatching is off or this is not a virtual call (could be a tailcall)
18919 regNumber vptrReg1 = REG_NA;
18920 regMaskTP vptrMask1 = RBM_NONE;
18921 unsigned vtabOffsOfIndirection;
18922 unsigned vtabOffsAfterIndirection;
18925 noway_assert(callType == CT_USER_FUNC);
18927 /* Get hold of the vtable offset (note: this might be expensive) */
18929 compiler->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
18930 &vtabOffsAfterIndirection, &isRelative);
18933 regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
18934 vptrMask = genRegMask(vptrReg);
18938 vptrReg1 = regSet.rsGrabReg(RBM_ALLINT & ~vptrMask);
18939 vptrMask1 = genRegMask(vptrReg1);
18942 /* The register no longer holds a live pointer value */
18943 gcInfo.gcMarkRegSetNpt(vptrMask);
18947 gcInfo.gcMarkRegSetNpt(vptrMask1);
18950 // MOV vptrReg, [REG_CALL_THIS + offs]
18951 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
18953 regTracker.rsTrackRegTrash(vptrReg);
18957 regTracker.rsTrackRegTrash(vptrReg1);
18960 noway_assert(vptrMask & ~call->gtCallRegUsedMask);
18962 /* The register no longer holds a live pointer value */
18963 gcInfo.gcMarkRegSetNpt(vptrMask);
18965 /* Get the appropriate vtable chunk */
18967 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
18971 #if defined(_TARGET_ARM_)
18972 unsigned offset = vtabOffsOfIndirection + vtabOffsAfterIndirection;
18974 // ADD vptrReg1, REG_CALL_IND_SCRATCH, vtabOffsOfIndirection + vtabOffsAfterIndirection
18975 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, vptrReg1, vptrReg, offset);
18981 // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
18982 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
18983 vtabOffsOfIndirection);
18987 assert(!isRelative);
18990 /* Call through the appropriate vtable slot */
18996 #if defined(_TARGET_ARM_)
18997 /* Load the function address: "[vptrReg1 + vptrReg] -> reg_intret" */
18998 getEmitter()->emitIns_R_ARR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg1,
19006 /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
19007 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
19008 vtabOffsAfterIndirection);
19013 #if CPU_LOAD_STORE_ARCH
19016 getEmitter()->emitIns_R_ARR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg1, vptrReg,
19021 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
19022 vtabOffsAfterIndirection);
19025 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCallMethHnd,
19026 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19027 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19028 gcInfo.gcRegByrefSetCur, ilOffset,
19031 assert(!isRelative);
19032 getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCallMethHnd,
19033 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19034 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19035 gcInfo.gcRegByrefSetCur, ilOffset,
19039 vtabOffsAfterIndirection); // disp
19040 #endif // CPU_LOAD_STORE_ARCH
19045 case GTF_CALL_NONVIRT:
19047 //------------------------ Non-virtual/Indirect calls -------------------------
19048 // Lots of cases follow
19049 // - Direct P/Invoke calls
19050 // - Indirect calls to P/Invoke functions via the P/Invoke stub
19051 // - Direct Helper calls
19052 // - Indirect Helper calls
19053 // - Direct calls to known addresses
19054 // - Direct calls where address is accessed by one or two indirections
19055 // - Indirect calls to computed addresses
19056 // - Tailcall versions of all of the above
19058 CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
19060 //------------------------------------------------------
19061 // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
19063 // For (final and private) functions which were called with
19064 // invokevirtual, but which we call directly, we need to
19065 // dereference the object pointer to make sure it's not NULL.
19068 if (call->gtFlags & GTF_CALL_NULLCHECK)
19070 /* Generate "cmp ECX, [ECX]" to trap null pointers */
19071 const regNumber regThis = genGetThisArgReg(call);
19072 #if CPU_LOAD_STORE_ARCH
19074 regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
19075 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
19076 regTracker.rsTrackRegTrash(indReg);
19078 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
19082 if (call->gtFlags & GTF_CALL_UNMANAGED)
19084 //------------------------------------------------------
19085 // Non-virtual/Indirect calls: PInvoke calls.
19087 noway_assert(compiler->info.compCallUnmanaged != 0);
19089 /* args shouldn't be greater than 64K */
19091 noway_assert((argSize & 0xffff0000) == 0);
19093 /* Remember the varDsc for the callsite-epilog */
19095 frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
19097 // exact codegen is required
19098 getEmitter()->emitDisableRandomNops();
19102 regNumber indCallReg = REG_NA;
19104 if (callType == CT_INDIRECT)
19106 noway_assert(genStillAddressable(call->gtCallAddr));
19108 if (call->gtCallAddr->InReg())
19109 indCallReg = call->gtCallAddr->gtRegNum;
19111 nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
19116 noway_assert(callType == CT_USER_FUNC);
19119 regNumber tcbReg = REG_NA;
19121 if (!compiler->opts.ShouldUsePInvokeHelpers())
19123 tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
19128 if (callType == CT_INDIRECT)
19130 /* Double check that the callee didn't use/trash the
19131 registers holding the call target.
19133 noway_assert(tcbReg != indCallReg);
19135 if (indCallReg == REG_NA)
19137 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19140 /* Please note that this even works with tcbReg == REG_EAX.
19141 tcbReg contains an interesting value only if frameListRoot is
19142 an enregistered local that stays alive across the call
19143 (certainly not EAX). If frameListRoot has been moved into
19144 EAX, we can trash it since it won't survive across the call
19148 inst_RV_TT(INS_mov, indCallReg, call->gtCallAddr);
19149 regTracker.rsTrackRegTrash(indCallReg);
19152 emitCallType = emitter::EC_INDIR_R;
19156 noway_assert(callType == CT_USER_FUNC);
19158 CORINFO_CONST_LOOKUP lookup;
19159 compiler->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
19161 addr = lookup.addr;
19163 assert(addr != NULL);
19165 #if defined(_TARGET_ARM_)
19166 // Legacy backend does not handle the `IAT_VALUE` case that does not
19167 // fit. It is not reachable currently from any front end so just check
19168 // for it via assert.
19169 assert(lookup.accessType != IAT_VALUE || arm_Valid_Imm_For_BL((ssize_t)addr));
19171 if (lookup.accessType == IAT_VALUE || lookup.accessType == IAT_PVALUE)
19173 #if CPU_LOAD_STORE_ARCH
19174 // Load the address into a register, indirect it and call through a register
19175 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19177 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19179 if (lookup.accessType == IAT_PVALUE)
19181 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19184 regTracker.rsTrackRegTrash(indCallReg);
19185 // Now make the call "call indCallReg"
19187 getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
19188 methHnd, // methHnd
19189 INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
19192 retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19193 gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
19195 emitCallType = emitter::EC_INDIR_R;
19198 emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19199 indCallReg = REG_NA;
19204 assert(lookup.accessType == IAT_PPVALUE);
19205 // Double-indirection. Load the address into a register
19206 // and call indirectly through a register
19207 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
19210 #if CPU_LOAD_STORE_ARCH
19211 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19212 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19213 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19214 regTracker.rsTrackRegTrash(indCallReg);
19216 emitCallType = emitter::EC_INDIR_R;
19219 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
19220 regTracker.rsTrackRegTrash(indCallReg);
19221 emitCallType = emitter::EC_INDIR_ARD;
19223 #endif // CPU_LOAD_STORE_ARCH
19225 // For a indirect calls, we don't want to pass the address (used below),
19226 // so set it to nullptr. (We've already used the address to load up the target register.)
19231 getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
19232 INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19233 gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
19234 ilOffset, indCallReg);
19236 if (callType == CT_INDIRECT)
19237 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19239 getEmitter()->emitEnableRandomNops();
19241 // Done with PInvoke calls
19245 if (callType == CT_INDIRECT)
19247 noway_assert(genStillAddressable(call->gtCallAddr));
19249 if (call->gtCallCookie)
19251 //------------------------------------------------------
19252 // Non-virtual indirect calls via the P/Invoke stub
19254 GenTree* cookie = call->gtCallCookie;
19255 GenTree* target = call->gtCallAddr;
19257 noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
19259 noway_assert(cookie->gtOper == GT_CNS_INT ||
19260 cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
19262 noway_assert(args == argSize);
19264 #if defined(_TARGET_X86_)
19265 /* load eax with the real target */
19267 inst_RV_TT(INS_mov, REG_EAX, target);
19268 regTracker.rsTrackRegTrash(REG_EAX);
19270 if (cookie->gtOper == GT_CNS_INT)
19271 inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
19273 inst_TT(INS_push, cookie);
19275 /* Keep track of ESP for EBP-less frames */
19278 argSize += REGSIZE_BYTES;
19280 #elif defined(_TARGET_ARM_)
19282 // Ensure that we spill these registers (if caller saved) in the prolog
19283 regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
19285 // ARM: load r12 with the real target
19286 // X64: load r10 with the real target
19287 inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
19288 regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
19290 // ARM: load r4 with the pinvoke VASigCookie
19291 // X64: load r11 with the pinvoke VASigCookie
19292 if (cookie->gtOper == GT_CNS_INT)
19293 inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
19294 EA_HANDLE_CNS_RELOC);
19296 inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
19297 regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19299 noway_assert(args == argSize);
19301 // Ensure that we don't trash any of these registers if we have to load
19302 // the helper call target into a register to invoke it.
19303 regMaskTP regsUsed;
19304 regSet.rsLockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | RBM_PINVOKE_COOKIE_PARAM,
19307 NYI("Non-virtual indirect calls via the P/Invoke stub");
19311 noway_assert((size_t)(int)args == args);
19313 genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
19315 #if defined(_TARGET_ARM_)
19316 regSet.rsUnlockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
19317 RBM_PINVOKE_COOKIE_PARAM,
19321 #ifdef _TARGET_ARM_
19322 // genEmitHelperCall doesn't record all registers a helper call would trash.
19323 regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
19328 //------------------------------------------------------
19329 // Non-virtual indirect calls
19333 inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
19334 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19337 instEmit_indCall(call, args, retSize);
19340 genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
19342 // Done with indirect calls
19346 //------------------------------------------------------
19347 // Non-virtual direct/indirect calls: Work out if the address of the
19348 // call is known at JIT time (if not it is either an indirect call
19349 // or the address must be accessed via an single/double indirection)
19351 noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
19354 InfoAccessType accessType;
19356 helperNum = compiler->eeGetHelperNum(methHnd);
19358 if (callType == CT_HELPER)
19360 noway_assert(helperNum != CORINFO_HELP_UNDEF);
19362 #ifdef FEATURE_READYTORUN_COMPILER
19363 if (call->gtEntryPoint.addr != NULL)
19365 accessType = call->gtEntryPoint.accessType;
19366 addr = call->gtEntryPoint.addr;
19369 #endif // FEATURE_READYTORUN_COMPILER
19373 accessType = IAT_VALUE;
19374 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
19378 accessType = IAT_PVALUE;
19385 noway_assert(helperNum == CORINFO_HELP_UNDEF);
19387 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
19389 if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
19390 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
19392 if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
19393 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
19395 #ifdef FEATURE_READYTORUN_COMPILER
19396 if (call->gtEntryPoint.addr != NULL)
19398 accessType = call->gtEntryPoint.accessType;
19399 addr = call->gtEntryPoint.addr;
19402 #endif // FEATURE_READYTORUN_COMPILER
19404 CORINFO_CONST_LOOKUP addrInfo;
19405 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
19407 accessType = addrInfo.accessType;
19408 addr = addrInfo.addr;
19414 noway_assert(callType == CT_USER_FUNC);
19416 switch (accessType)
19419 //------------------------------------------------------
19420 // Non-virtual direct calls to known addressess
19422 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19426 //------------------------------------------------------
19427 // Non-virtual direct calls to addresses accessed by
19428 // a single indirection.
19430 // For tailcalls we place the target address in REG_TAILCALL_ADDR
19431 CLANG_FORMAT_COMMENT_ANCHOR;
19433 #if CPU_LOAD_STORE_ARCH
19435 regNumber indReg = REG_TAILCALL_ADDR;
19436 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19437 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19438 regTracker.rsTrackRegTrash(indReg);
19441 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19442 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19447 //------------------------------------------------------
19448 // Non-virtual direct calls to addresses accessed by
19449 // a double indirection.
19451 // For tailcalls we place the target address in REG_TAILCALL_ADDR
19452 CLANG_FORMAT_COMMENT_ANCHOR;
19454 #if CPU_LOAD_STORE_ARCH
19456 regNumber indReg = REG_TAILCALL_ADDR;
19457 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
19458 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19459 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
19460 regTracker.rsTrackRegTrash(indReg);
19463 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
19464 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
19465 REG_TAILCALL_ADDR, 0);
19466 regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
19471 noway_assert(!"Bad accessType");
19477 switch (accessType)
19479 regNumber indCallReg;
19483 //------------------------------------------------------
19484 // Non-virtual direct calls to known addressess
19486 // The vast majority of calls end up here.... Wouldn't
19487 // it be nice if they all did!
19488 CLANG_FORMAT_COMMENT_ANCHOR;
19489 #ifdef _TARGET_ARM_
19490 // We may use direct call for some of recursive calls
19491 // as we can safely estimate the distance from the call site to the top of the method
19492 const int codeOffset = MAX_PROLOG_SIZE_BYTES + // prolog size
19493 getEmitter()->emitCurCodeOffset + // offset of the current IG
19494 getEmitter()->emitCurIGsize + // size of the current IG
19495 4; // size of the jump instruction
19496 // that we are now emitting
19497 if (compiler->gtIsRecursiveCall(call) && codeOffset <= -CALL_DIST_MAX_NEG)
19499 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19500 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19501 args, retSize, gcInfo.gcVarPtrSetCur,
19502 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19503 REG_NA, REG_NA, 0, 0, // ireg, xreg, xmul, disp
19505 emitter::emitNoGChelper(helperNum));
19507 else if (!arm_Valid_Imm_For_BL((ssize_t)addr))
19509 // Load the address into a register and call through a register
19510 indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
19511 // CALL indirection
19512 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19514 getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
19515 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19516 args, retSize, gcInfo.gcVarPtrSetCur,
19517 gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
19518 indCallReg, // ireg
19519 REG_NA, 0, 0, // xreg, xmul, disp
19521 emitter::emitNoGChelper(helperNum));
19526 getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
19527 INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
19528 gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19529 gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
19530 0, /* ireg, xreg, xmul, disp */
19531 false, /* isJump */
19532 emitter::emitNoGChelper(helperNum));
19539 //------------------------------------------------------
19540 // Non-virtual direct calls to addresses accessed by
19541 // a single indirection.
19544 // Load the address into a register, load indirect and call through a register
19545 CLANG_FORMAT_COMMENT_ANCHOR;
19546 #if CPU_LOAD_STORE_ARCH
19547 regMaskTP indCallMask = RBM_ALLINT;
19549 #ifdef FEATURE_READYTORUN_COMPILER
19550 if (call->IsR2RRelativeIndir())
19552 indCallMask &= ~RBM_R2R_INDIRECT_PARAM;
19554 #endif // FEATURE_READYTORUN_COMPILER
19556 // Grab an available register to use for the CALL indirection
19557 indCallReg = regSet.rsGrabReg(indCallMask);
19559 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19561 #ifdef FEATURE_READYTORUN_COMPILER
19562 if (call->IsR2RRelativeIndir())
19564 noway_assert(regSet.rsRegMaskCanGrab() & RBM_R2R_INDIRECT_PARAM);
19565 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R2R_INDIRECT_PARAM, indCallReg);
19566 regTracker.rsTrackRegTrash(REG_R2R_INDIRECT_PARAM);
19568 #endif // FEATURE_READYTORUN_COMPILER
19570 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19571 regTracker.rsTrackRegTrash(indCallReg);
19573 emitCallType = emitter::EC_INDIR_R;
19577 emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
19578 indCallReg = REG_NA;
19580 #endif // CPU_LOAD_STORE_ARCH
19582 getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
19583 retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19584 gcInfo.gcRegByrefSetCur, ilOffset,
19585 indCallReg, // ireg
19586 REG_NA, 0, 0, // xreg, xmul, disp
19587 false, /* isJump */
19588 emitter::emitNoGChelper(helperNum));
19594 //------------------------------------------------------
19595 // Non-virtual direct calls to addresses accessed by
19596 // a double indirection.
19598 // Double-indirection. Load the address into a register
19599 // and call indirectly through the register
19601 noway_assert(helperNum == CORINFO_HELP_UNDEF);
19603 // Grab an available register to use for the CALL indirection
19604 indCallReg = regSet.rsGrabReg(RBM_ALLINT);
19606 #if CPU_LOAD_STORE_ARCH
19607 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
19608 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19609 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
19610 regTracker.rsTrackRegTrash(indCallReg);
19612 emitCallType = emitter::EC_INDIR_R;
19616 getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
19617 regTracker.rsTrackRegTrash(indCallReg);
19619 emitCallType = emitter::EC_INDIR_ARD;
19621 #endif // CPU_LOAD_STORE_ARCH
19623 getEmitter()->emitIns_Call(emitCallType, methHnd,
19624 INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
19625 args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
19626 gcInfo.gcRegByrefSetCur, ilOffset,
19627 indCallReg, // ireg
19628 REG_NA, 0, 0, // xreg, xmul, disp
19630 emitter::emitNoGChelper(helperNum));
19635 noway_assert(!"Bad accessType");
19639 // tracking of region protected by the monitor in synchronized methods
19640 if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
19642 fPossibleSyncHelperCall = true;
19649 noway_assert(!"strange call type");
19653 /*-------------------------------------------------------------------------
19654 * For tailcalls, REG_INTRET contains the address of the target function,
19655 * enregistered args are in the correct registers, and the stack arguments
19656 * have been pushed on the stack. Now call the stub-sliding helper
19662 if (compiler->info.compCallUnmanaged)
19663 genPInvokeMethodEpilog();
19665 #ifdef _TARGET_X86_
19666 noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
19668 // Push the count of the incoming stack arguments
19670 unsigned nOldStkArgs =
19671 (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES);
19672 getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
19673 genSinglePush(); // Keep track of ESP for EBP-less frames
19674 args += REGSIZE_BYTES;
19676 // Push the count of the outgoing stack arguments
19678 getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / REGSIZE_BYTES);
19679 genSinglePush(); // Keep track of ESP for EBP-less frames
19680 args += REGSIZE_BYTES;
19682 // Push info about the callee-saved registers to be restored
19683 // For now, we always spill all registers if compiler->compTailCallUsed
19685 DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX
19686 (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
19687 getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
19688 genSinglePush(); // Keep track of ESP for EBP-less frames
19689 args += REGSIZE_BYTES;
19691 // Push the address of the target function
19693 getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
19694 genSinglePush(); // Keep track of ESP for EBP-less frames
19695 args += REGSIZE_BYTES;
19697 #else // _TARGET_X86_
19700 retSize = EA_UNKNOWN;
19702 #endif // _TARGET_X86_
19704 if (compiler->getNeedsGSSecurityCookie())
19706 genEmitGSCookieCheck(true);
19709 // TailCall helper does not poll for GC. An explicit GC poll
19710 // Should have been placed in when we morphed this into a tail call.
19711 noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
19713 // Now call the helper
19715 genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
19718 /*-------------------------------------------------------------------------
19720 * Trash registers, pop arguments if needed, etc
19723 /* Mark the argument registers as free */
19725 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19727 for (areg = 0; areg < MAX_REG_ARG; areg++)
19729 regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
19731 // Is this one of the used argument registers?
19732 if ((curArgMask & call->gtCallRegUsedMask) == 0)
19735 #ifdef _TARGET_ARM_
19736 if (regSet.rsUsedTree[areg] == NULL)
19738 noway_assert(areg % 2 == 1 &&
19739 (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
19740 (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
19745 regSet.rsMarkRegFree(curArgMask);
19747 // We keep regSet.rsMaskVars current during codegen, so we have to remove any
19748 // that have been copied into arg regs.
19750 regSet.RemoveMaskVars(curArgMask);
19751 gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
19752 gcInfo.gcRegByrefSetCur &= ~(curArgMask);
19755 #if !FEATURE_STACK_FP_X87
19756 //-------------------------------------------------------------------------
19757 // free up the FP args
19759 for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
19761 regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
19762 regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
19764 // Is this one of the used argument registers?
19765 if ((curArgMask & call->gtCallRegUsedMask) == 0)
19768 regSet.rsMaskUsed &= ~curArgMask;
19769 regSet.rsUsedTree[argRegNum] = NULL;
19771 #endif // !FEATURE_STACK_FP_X87
19773 /* restore the old argument register status */
19775 intRegState.rsCurRegArgNum = savCurIntArgReg;
19776 floatRegState.rsCurRegArgNum = savCurFloatArgReg;
19778 noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
19780 /* Mark all trashed registers as such */
19782 if (calleeTrashedRegs)
19783 regTracker.rsTrashRegSet(calleeTrashedRegs);
19785 regTracker.rsTrashRegsForGCInterruptability();
19789 if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19791 if (compiler->verbose)
19793 printf("\t\t\t\t\t\t\tEnd call ");
19794 Compiler::printTreeID(call);
19795 printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
19797 noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
19802 #if FEATURE_STACK_FP_X87
19803 /* All float temps must be spilled around function calls */
19804 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19806 noway_assert(compCurFPState.m_uStackSize == 1);
19810 noway_assert(compCurFPState.m_uStackSize == 0);
19813 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
19815 #ifdef _TARGET_ARM_
19816 if (call->IsVarargs() || compiler->opts.compUseSoftFP)
19818 // Result return for vararg methods is in r0, r1, but our callers would
19819 // expect the return in s0, s1 because of floating type. Do the move now.
19820 if (call->gtType == TYP_FLOAT)
19822 inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
19826 inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
19830 genMarkTreeInReg(call, REG_FLOATRET);
19834 /* The function will pop all arguments before returning */
19836 SetStackLevel(saveStackLvl);
19838 /* No trashed registers may possibly hold a pointer at this point */
19839 CLANG_FORMAT_COMMENT_ANCHOR;
19843 regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
19844 ~regSet.rsMaskVars & ~vptrMask;
19847 // A reg may be dead already. The assertion is too strong.
19851 // use compiler->compCurLife
19852 for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
19854 /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
19856 if (!varDsc->lvTracked)
19858 if (!varDsc->lvRegister)
19860 if (varDsc->IsFloatRegType())
19863 /* Get hold of the index and the bitmask for the variable */
19865 unsigned varIndex = varDsc->lvVarIndex;
19867 /* Is this variable live currently? */
19869 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
19871 regNumber regNum = varDsc->lvRegNum;
19872 regMaskTP regMask = genRegMask(regNum);
19874 if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
19875 ptrRegs &= ~regMask;
19880 printf("Bad call handling for ");
19881 Compiler::printTreeID(call);
19883 noway_assert(!"A callee trashed reg is holding a GC pointer");
19888 #if defined(_TARGET_X86_)
19889 //-------------------------------------------------------------------------
19890 // Create a label for tracking of region protected by the monitor in synchronized methods.
19891 // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
19892 // so the GC state vars have been updated before creating the label.
19894 if (fPossibleSyncHelperCall)
19898 case CORINFO_HELP_MON_ENTER:
19899 case CORINFO_HELP_MON_ENTER_STATIC:
19900 noway_assert(compiler->syncStartEmitCookie == NULL);
19901 compiler->syncStartEmitCookie =
19902 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19903 noway_assert(compiler->syncStartEmitCookie != NULL);
19905 case CORINFO_HELP_MON_EXIT:
19906 case CORINFO_HELP_MON_EXIT_STATIC:
19907 noway_assert(compiler->syncEndEmitCookie == NULL);
19908 compiler->syncEndEmitCookie =
19909 getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
19910 noway_assert(compiler->syncEndEmitCookie != NULL);
19916 #endif // _TARGET_X86_
19918 if (call->gtFlags & GTF_CALL_UNMANAGED)
19920 genDefineTempLabel(returnLabel);
19922 #ifdef _TARGET_X86_
19923 if (getInlinePInvokeCheckEnabled())
19925 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
19926 BasicBlock* esp_check;
19928 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19929 /* mov ecx, dword ptr [frame.callSiteTracker] */
19931 getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
19932 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19933 regTracker.rsTrackRegTrash(REG_ARG_0);
19935 /* Generate the conditional jump */
19937 if (!(call->gtFlags & GTF_CALL_POP_ARGS))
19941 getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
19946 getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
19948 esp_check = genCreateTempLabel();
19950 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
19951 inst_JMP(jmpEqual, esp_check);
19953 getEmitter()->emitIns(INS_BREAKPOINT);
19955 /* genCondJump() closes the current emitter block */
19957 genDefineTempLabel(esp_check);
19962 /* Are we supposed to pop the arguments? */
19963 CLANG_FORMAT_COMMENT_ANCHOR;
19965 #if defined(_TARGET_X86_)
19966 if (call->gtFlags & GTF_CALL_UNMANAGED)
19968 if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PINVOKE_RESTORE_ESP) ||
19969 compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
19971 // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
19972 // take care of the cdecl argument popping here as well but the stack depth tracking logic
19973 // makes this very hard, i.e. it needs to "see" the actual pop.
19975 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
19977 if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
19979 /* mov esp, dword ptr [frame.callSiteTracker] */
19980 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
19981 compiler->lvaInlinedPInvokeFrameVar,
19982 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19986 /* mov ecx, dword ptr [frame.callSiteTracker] */
19987 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
19988 compiler->lvaInlinedPInvokeFrameVar,
19989 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
19990 regTracker.rsTrackRegTrash(REG_ARG_0);
19992 /* lea esp, [ecx + argSize] */
19993 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
19997 #endif // _TARGET_X86_
19999 if (call->gtFlags & GTF_CALL_POP_ARGS)
20001 noway_assert(args == (size_t) - (int)argSize);
20005 genAdjustSP(argSize);
20009 if (pseudoStackLvl)
20011 noway_assert(call->gtType == TYP_VOID);
20018 /* What does the function return? */
20022 switch (call->gtType)
20026 gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
20031 #if !CPU_HAS_FP_SUPPORT
20034 retVal = RBM_INTRET;
20037 #ifdef _TARGET_ARM_
20040 assert(call->gtRetClsHnd != NULL);
20041 assert(compiler->IsHfa(call->gtRetClsHnd));
20042 int retSlots = compiler->GetHfaCount(call->gtRetClsHnd);
20043 assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
20044 assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
20045 retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
20051 #if !CPU_HAS_FP_SUPPORT
20054 retVal = RBM_LNGRET;
20057 #if CPU_HAS_FP_SUPPORT
20068 noway_assert(!"unexpected/unhandled fn return type");
20071 // We now have to generate the "call epilog" (if it was a call to unmanaged code).
20072 /* if it is a call to unmanaged code, frameListRoot must be set */
20074 noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
20077 genPInvokeCallEpilog(frameListRoot, retVal);
20079 if (frameListRoot && (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
20081 if (frameListRoot->lvRegister)
20083 bool isBorn = false;
20084 bool isDying = true;
20085 genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
20090 if (compiler->opts.compStackCheckOnCall
20091 #if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
20092 // check the stack as frequently as possible
20093 && !call->IsHelperCall()
20095 && call->gtCallType == CT_USER_FUNC
20099 noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
20100 compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
20101 compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
20104 getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
20105 getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
20106 getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
20107 regTracker.rsTrackRegTrash(REG_ARG_0);
20110 getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
20112 BasicBlock* esp_check = genCreateTempLabel();
20113 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20114 inst_JMP(jmpEqual, esp_check);
20115 getEmitter()->emitIns(INS_BREAKPOINT);
20116 genDefineTempLabel(esp_check);
20120 #if FEATURE_STACK_FP_X87
20121 UnspillRegVarsStackFp();
20122 #endif // FEATURE_STACK_FP_X87
20124 if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
20126 // Restore return node if necessary
20127 if (call->gtFlags & GTF_SPILLED)
20129 UnspillFloat(call);
20132 #if FEATURE_STACK_FP_X87
20134 regSet.SetUsedRegFloat(call, false);
20138 #if FEATURE_STACK_FP_X87
20140 if (compiler->verbose)
20150 #pragma warning(pop)
20153 /*****************************************************************************
20155 * Create and record GC Info for the function.
20157 #ifdef JIT32_GCENCODER
20162 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
20164 #ifdef JIT32_GCENCODER
20165 return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
20167 genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
20171 #ifdef JIT32_GCENCODER
20172 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
20173 unsigned prologSize,
20174 unsigned epilogSize DEBUGARG(void* codePtr))
20176 BYTE headerBuf[64];
20181 size_t headerSize =
20183 compiler->compInfoBlkSize =
20184 gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
20186 size_t argTabOffset = 0;
20187 size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
20191 if (genInterruptible)
20193 gcHeaderISize += compiler->compInfoBlkSize;
20194 gcPtrMapISize += ptrMapSize;
20198 gcHeaderNSize += compiler->compInfoBlkSize;
20199 gcPtrMapNSize += ptrMapSize;
20202 #endif // DISPLAY_SIZES
20204 compiler->compInfoBlkSize += ptrMapSize;
20206 /* Allocate the info block for the method */
20208 compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
20210 #if 0 // VERBOSE_SIZES
20211 // TODO-Review: 'dataSize', below, is not defined
20213 // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
20215 printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
20216 compiler->info.compILCodeSize,
20217 compiler->compInfoBlkSize,
20218 codeSize + dataSize,
20219 codeSize + dataSize - prologSize - epilogSize,
20220 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
20221 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
20222 compiler->info.compClassName,
20223 compiler->info.compMethodName);
20228 /* Fill in the info block and return it to the caller */
20230 void* infoPtr = compiler->compInfoBlkAddr;
20232 /* Create the method info block: header followed by GC tracking tables */
20234 compiler->compInfoBlkAddr +=
20235 gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
20237 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
20238 compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
20239 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
20245 BYTE* temp = (BYTE*)infoPtr;
20246 unsigned size = compiler->compInfoBlkAddr - temp;
20247 BYTE* ptab = temp + headerSize;
20249 noway_assert(size == headerSize + ptrMapSize);
20251 printf("Method info block - header [%u bytes]:", headerSize);
20253 for (unsigned i = 0; i < size; i++)
20257 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
20258 printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
20263 printf("\n %04X: ", i);
20266 printf("%02X ", *temp++);
20276 if (compiler->opts.dspGCtbls)
20278 const BYTE* base = (BYTE*)infoPtr;
20280 unsigned methodSize;
20281 InfoHdr dumpHeader;
20283 printf("GC Info for method %s\n", compiler->info.compFullName);
20284 printf("GC info size = %3u\n", compiler->compInfoBlkSize);
20286 size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
20287 // printf("size of header encoding is %3u\n", size);
20290 if (compiler->opts.dspGCtbls)
20293 size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
20294 // printf("size of pointer table is %3u\n", size);
20296 noway_assert(compiler->compInfoBlkAddr == (base + size));
20301 if (jitOpts.testMask & 128)
20303 for (unsigned offs = 0; offs < codeSize; offs++)
20305 gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
20309 #endif // DUMP_GC_TABLES
20311 /* Make sure we ended up generating the expected number of bytes */
20313 noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
20318 #else // JIT32_GCENCODER
20320 void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
20322 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
20323 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
20324 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
20325 assert(gcInfoEncoder);
20327 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
20328 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
20330 // We keep the call count for the second call to gcMakeRegPtrTable() below.
20331 unsigned callCnt = 0;
20332 // First we figure out the encoder ID's for the stack slots and registers.
20333 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
20334 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
20335 gcInfoEncoder->FinalizeSlotIds();
20336 // Now we can actually use those slot ID's to declare live ranges.
20337 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
20339 gcInfoEncoder->Build();
20341 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
20342 // let's save the values anyway for debugging purposes
20343 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
20344 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
20348 /*****************************************************************************
20352 regNumber CodeGen::genLclHeap(GenTree* size)
20354 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
20356 // regCnt is a register used to hold both
20357 // the amount to stack alloc (either in bytes or pointer sized words)
20358 // and the final stack alloc address to return as the result
20360 regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
20361 var_types type = genActualType(size->gtType);
20362 emitAttr easz = emitTypeSize(type);
20366 if (compiler->opts.compStackCheckOnRet)
20368 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20369 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20370 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20371 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20373 BasicBlock* esp_check = genCreateTempLabel();
20374 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20375 inst_JMP(jmpEqual, esp_check);
20376 getEmitter()->emitIns(INS_BREAKPOINT);
20377 genDefineTempLabel(esp_check);
20381 noway_assert(isFramePointerUsed());
20382 noway_assert(genStackLevel == 0); // Can't have anything on the stack
20384 BasicBlock* endLabel = NULL;
20385 #if FEATURE_FIXED_OUT_ARGS
20386 bool stackAdjusted = false;
20389 if (size->IsCnsIntOrI())
20391 #if FEATURE_FIXED_OUT_ARGS
20392 // If we have an outgoing arg area then we must adjust the SP
20393 // essentially popping off the outgoing arg area,
20394 // We will restore it right before we return from this method
20396 if (compiler->lvaOutgoingArgSpaceSize > 0)
20398 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20399 0); // This must be true for the stack to remain aligned
20400 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20401 stackAdjusted = true;
20404 size_t amount = size->gtIntCon.gtIconVal;
20406 // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
20407 amount += (STACK_ALIGN - 1);
20408 amount &= ~(STACK_ALIGN - 1);
20409 amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc
20410 size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20412 /* If amount is zero then return null in RegCnt */
20415 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20416 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20420 /* For small allocations we will generate up to six push 0 inline */
20423 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20424 #if CPU_LOAD_STORE_ARCH
20425 regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20426 // Set 'regZero' to zero
20427 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
20430 while (amount != 0)
20432 #if CPU_LOAD_STORE_ARCH
20433 inst_IV(INS_push, (unsigned)genRegMask(regZero));
20435 inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
20440 regTracker.rsTrackRegTrash(regCnt);
20441 // --- move regCnt, ESP
20442 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20447 if (!compiler->info.compInitMem)
20449 // Re-bias amount to be number of bytes to adjust the SP
20450 amount <<= STACK_ALIGN_SHIFT;
20451 size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
20452 if (amount < compiler->eeGetPageSize()) // must be < not <=
20454 // Since the size is a page or less, simply adjust ESP
20456 // ESP might already be in the guard page, must touch it BEFORE
20457 // the alloc, not after.
20458 regCnt = regSet.rsGrabReg(RBM_ALLINT);
20459 inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
20460 #if CPU_LOAD_STORE_ARCH
20461 regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20462 getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
20463 regTracker.rsTrackRegTrash(regTmp);
20465 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20467 inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
20468 inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
20469 regTracker.rsTrackRegTrash(regCnt);
20476 // Compute the size of the block to allocate
20477 genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
20478 noway_assert(size->InReg());
20479 regCnt = size->gtRegNum;
20481 #if FEATURE_FIXED_OUT_ARGS
20482 // If we have an outgoing arg area then we must adjust the SP
20483 // essentially popping off the outgoing arg area,
20484 // We will restore it right before we return from this method
20486 if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
20488 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20489 0); // This must be true for the stack to remain aligned
20490 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20491 stackAdjusted = true;
20495 // Perform alignment if we don't have a GT_CNS size
20497 if (!size->IsCnsIntOrI())
20499 endLabel = genCreateTempLabel();
20501 // If 0 we bail out
20502 instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
20503 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
20504 inst_JMP(jmpEqual, endLabel);
20506 // Align to STACK_ALIGN
20507 inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
20509 if (compiler->info.compInitMem)
20511 #if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
20512 // regCnt will be the number of pointer-sized words to locAlloc
20513 // If the shift right won't do the 'and' do it here
20514 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20516 // --- shr regCnt, 2 ---
20517 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
20521 // regCnt will be the total number of bytes to locAlloc
20523 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
20528 loop = genCreateTempLabel();
20530 if (compiler->info.compInitMem)
20532 // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
20534 /* Since we have to zero out the allocated memory AND ensure that
20535 ESP is always valid by tickling the pages, we will just push 0's
20537 CLANG_FORMAT_COMMENT_ANCHOR;
20539 #if defined(_TARGET_ARM_)
20540 regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
20541 regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
20542 // Set 'regZero1' and 'regZero2' to zero
20543 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
20544 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
20548 genDefineTempLabel(loop);
20550 #if defined(_TARGET_X86_)
20552 inst_IV(INS_push_hide, 0); // --- push 0
20554 inst_RV(INS_dec, regCnt, type);
20556 #elif defined(_TARGET_ARM_)
20558 inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
20560 inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
20563 assert(!"Codegen missing");
20566 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
20567 inst_JMP(jmpNotEqual, loop);
20569 // Move the final value of ESP into regCnt
20570 inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
20571 regTracker.rsTrackRegTrash(regCnt);
20575 // At this point 'regCnt' is set to the total number of bytes to locAlloc
20577 /* We don't need to zero out the allocated memory. However, we do have
20578 to tickle the pages to ensure that ESP is always valid and is
20579 in sync with the "stack guard page". Note that in the worst
20580 case ESP is on the last byte of the guard page. Thus you must
20581 touch ESP+0 first not ESP+x01000.
20583 Another subtlety is that you don't want ESP to be exactly on the
20584 boundary of the guard page because PUSH is predecrement, thus
20585 call setup would not touch the guard page but just beyond it */
20587 /* Note that we go through a few hoops so that ESP never points to
20588 illegal pages at any time during the ticking process
20591 add REG, ESP // reg now holds ultimate ESP
20592 jb loop // result is smaller than orignial ESP (no wrap around)
20593 xor REG, REG, // Overflow, pick lowest possible number
20595 test ESP, [ESP+0] // X86 - tickle the page
20596 ldr REGH,[ESP+0] // ARM - tickle the page
20598 sub REGH, GetOsPageSize()
20606 CLANG_FORMAT_COMMENT_ANCHOR;
20608 #ifdef _TARGET_ARM_
20610 inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
20611 inst_JMP(EJ_hs, loop);
20613 inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
20614 inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
20615 inst_JMP(EJ_jb, loop);
20617 regTracker.rsTrackRegTrash(regCnt);
20619 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
20621 genDefineTempLabel(loop);
20623 // This is a workaround to avoid the emitter trying to track the
20624 // decrement of the ESP - we do the subtraction in another reg
20625 // instead of adjusting ESP directly.
20627 regNumber regTemp = regSet.rsPickReg();
20629 // Tickle the decremented value, and move back to ESP,
20630 // note that it has to be done BEFORE the update of ESP since
20631 // ESP might already be on the guard page. It is OK to leave
20632 // the final value of ESP on the guard page
20633 CLANG_FORMAT_COMMENT_ANCHOR;
20635 #if CPU_LOAD_STORE_ARCH
20636 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
20638 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
20641 inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
20642 regTracker.rsTrackRegTrash(regTemp);
20644 inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
20645 inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
20647 genRecoverReg(size, RBM_ALLINT,
20648 RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
20649 noway_assert(size->InReg());
20650 regCnt = size->gtRegNum;
20651 inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
20652 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
20653 inst_JMP(jmpGEU, loop);
20655 // Move the final value to ESP
20656 inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
20658 regSet.rsMarkRegFree(genRegMask(regCnt));
20662 noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
20664 if (endLabel != NULL)
20665 genDefineTempLabel(endLabel);
20667 #if FEATURE_FIXED_OUT_ARGS
20668 // If we have an outgoing arg area then we must readjust the SP
20672 assert(compiler->lvaOutgoingArgSpaceSize > 0);
20673 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
20674 0); // This must be true for the stack to remain aligned
20675 inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
20679 /* Write the lvaLocAllocSPvar stack frame slot */
20680 if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
20682 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
20686 // Don't think it is worth it the codegen complexity to embed this
20687 // when it's possible in each of the customized allocas.
20688 if (compiler->opts.compNeedStackProbes)
20690 genGenerateStackProbe();
20696 if (compiler->opts.compStackCheckOnRet)
20698 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
20699 compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
20700 compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
20701 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
20708 /*****************************************************************************
20710 * Return non-zero if the given register is free after the given tree is
20711 * evaluated (i.e. the register is either not used at all, or it holds a
20712 * register variable which is not live after the given node).
20713 * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
20714 * constant operand, and one that's in a register. Thus, the only thing we
20715 * need to determine is whether the register holding op1 is dead.
20717 bool CodeGen::genRegTrashable(regNumber reg, GenTree* tree)
20720 regMaskTP mask = genRegMask(reg);
20722 if (regSet.rsMaskUsed & mask)
20725 assert(tree->gtOper == GT_ADD);
20726 GenTree* regValTree = tree->gtOp.gtOp1;
20727 if (!tree->gtOp.gtOp2->IsCnsIntOrI())
20729 regValTree = tree->gtOp.gtOp2;
20730 assert(tree->gtOp.gtOp1->IsCnsIntOrI());
20732 assert(regValTree->InReg());
20734 /* At this point, the only way that the register will remain live
20735 * is if it is itself a register variable that isn't dying.
20737 assert(regValTree->gtRegNum == reg);
20738 if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
20744 /*****************************************************************************/
20746 // This method calculates the USE and DEF values for a statement.
20747 // It also calls fgSetRngChkTarget for the statement.
20749 // We refactor out this code from fgPerBlockLocalVarLiveness
20750 // and add QMARK logics to it.
20752 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20754 // The usage of this method is very limited.
20755 // We should only call it for the first node in the statement or
20756 // for the node after the GTF_RELOP_QMARK node.
20758 // NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
20761 Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
20762 when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
20765 Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
20768 The algorithm we use is:
20769 (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
20770 (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
20771 We cache copies of current fgCurDefSet and fgCurUseSet.
20772 (The fact that it is recursively calling itself is for nested QMARK case,
20773 where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
20774 (3) We walk the thenTree.
20775 (4) When we see GT_COLON node, we know that we just finished the thenTree.
20776 We then make a copy of the current fgCurDefSet and fgCurUseSet,
20777 restore them to the ones before the thenTree, and then continue walking
20779 (5) When we see the GT_QMARK node, we know we just finished the elseTree.
20780 So we combine the results from the thenTree and elseTree and then return.
20783 +--------------------+
20785 +----------+---------+
20791 +---------------------+ +--------------------+
20792 | GT_<cond> 3 | | GT_COLON 7 |
20793 | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
20794 +----------+----------+ +---------+----------+
20800 2 1 thenTree 6 elseTree 10
20803 +----------------+ / / \ / \
20804 |prevExpr->gtNext+------/ / \ / \
20805 +----------------+ / \ / \
20811 GenTree* Compiler::fgLegacyPerStatementLocalVarLiveness(GenTree* startNode, // The node to start walking with.
20812 GenTree* relopNode) // The node before the startNode.
20813 // (It should either be NULL or
20814 // a GTF_RELOP_QMARK node.)
20818 VARSET_TP defSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurDefSet)); // Store the current fgCurDefSet and
20820 VARSET_TP useSet_BeforeSplit(VarSetOps::MakeCopy(this, fgCurUseSet)); // we can restore then before entering the
20823 MemoryKindSet memoryUse_BeforeSplit = fgCurMemoryUse;
20824 MemoryKindSet memoryDef_BeforeSplit = fgCurMemoryDef;
20825 MemoryKindSet memoryHavoc_BeforeSplit = fgCurMemoryHavoc;
20827 VARSET_TP defSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // These two variables will store
20828 // the USE and DEF sets after
20829 VARSET_TP useSet_AfterThenTree(VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
20831 MemoryKindSet memoryUse_AfterThenTree = fgCurMemoryUse;
20832 MemoryKindSet memoryDef_AfterThenTree = fgCurMemoryDef;
20833 MemoryKindSet memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20835 // relopNode is either NULL or a GTF_RELOP_QMARK node.
20836 assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
20838 // If relopNode is NULL, then the startNode must be the 1st node of the statement.
20839 // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
20840 assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
20841 (relopNode && startNode == relopNode->gtNext));
20843 for (tree = startNode; tree; tree = tree->gtNext)
20845 switch (tree->gtOper)
20850 // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
20851 noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
20853 // By the time we see a GT_QMARK, we must have finished processing the elseTree.
20854 // So it's the time to combine the results
20855 // from the the thenTree and the elseTree, and then return.
20857 VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
20858 VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
20860 fgCurMemoryDef = fgCurMemoryDef & memoryDef_AfterThenTree;
20861 fgCurMemoryHavoc = fgCurMemoryHavoc & memoryHavoc_AfterThenTree;
20862 fgCurMemoryUse = fgCurMemoryUse | memoryUse_AfterThenTree;
20864 // Return the GT_QMARK node itself so the caller can continue from there.
20865 // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
20866 // in the "for" statement.
20870 // By the time we see GT_COLON, we must have just walked the thenTree.
20871 // So we need to do two things here.
20872 // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
20873 // with the result from the elseTree.
20874 // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
20875 // and then continue walking the elseTree.
20876 VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
20877 VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
20879 memoryDef_AfterThenTree = fgCurMemoryDef;
20880 memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
20881 memoryUse_AfterThenTree = fgCurMemoryUse;
20883 VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
20884 VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
20886 fgCurMemoryDef = memoryDef_BeforeSplit;
20887 fgCurMemoryHavoc = memoryHavoc_BeforeSplit;
20888 fgCurMemoryUse = memoryUse_BeforeSplit;
20894 case GT_LCL_VAR_ADDR:
20895 case GT_LCL_FLD_ADDR:
20896 case GT_STORE_LCL_VAR:
20897 case GT_STORE_LCL_FLD:
20898 fgMarkUseDef(tree->AsLclVarCommon());
20902 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20903 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20904 // This models Volatile reads as def-then-use of the heap.
20905 // and allows for a CSE of a subsequent non-volatile read
20906 if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
20908 // For any Volatile indirection, we must handle it as a
20909 // definition of GcHeap/ByrefExposed
20910 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20912 // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
20914 // Otherwise, we treat it as a use here.
20915 if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
20917 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20922 // For Volatile indirection, first mutate GcHeap/ByrefExposed
20923 // see comments in ValueNum.cpp (under case GT_CLS_VAR)
20924 // This models Volatile reads as def-then-use of the heap.
20925 // and allows for a CSE of a subsequent non-volatile read
20926 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
20928 // For any Volatile indirection, we must handle it as a
20929 // definition of GcHeap/ByrefExposed
20930 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20933 // If the GT_IND is the lhs of an assignment, we'll handle it
20934 // as a heap/byref def, when we get to assignment.
20935 // Otherwise, we treat it as a use here.
20936 if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
20938 GenTreeLclVarCommon* dummyLclVarTree = NULL;
20939 bool dummyIsEntire = false;
20940 GenTree* addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
20941 if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
20943 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20947 // Defines a local addr
20948 assert(dummyLclVarTree != nullptr);
20949 fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
20954 // These should have been morphed away to become GT_INDs:
20960 // We'll assume these are use-then-defs of GcHeap/ByrefExposed.
20965 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20966 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20967 fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20970 case GT_MEMORYBARRIER:
20971 // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
20972 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20975 // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety. Might tighten this case
20979 GenTreeCall* call = tree->AsCall();
20980 bool modHeap = true;
20981 if (call->gtCallType == CT_HELPER)
20983 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
20985 if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
20992 fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
20993 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
20994 fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
20998 // If this is a p/invoke unmanaged call or if this is a tail-call
20999 // and we have an unmanaged p/invoke call in the method,
21000 // then we're going to run the p/invoke epilog.
21001 // So we mark the FrameRoot as used by this instruction.
21002 // This ensures that the block->bbVarUse will contain
21003 // the FrameRoot local var if is it a tracked variable.
21005 if (!opts.ShouldUsePInvokeHelpers())
21007 if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
21009 /* Get the TCB local and mark it as used */
21011 noway_assert(info.compLvFrameListRoot < lvaCount);
21013 LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
21015 if (varDsc->lvTracked)
21017 if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
21019 VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
21029 // Determine what memory kinds it defines.
21030 if (tree->OperIsAssignment() || tree->OperIsBlkOp())
21032 GenTreeLclVarCommon* dummyLclVarTree = NULL;
21033 if (tree->DefinesLocal(this, &dummyLclVarTree))
21035 if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
21037 fgCurMemoryDef |= memoryKindSet(ByrefExposed);
21039 // We've found a store that modifies ByrefExposed
21040 // memory but not GcHeap memory, so track their
21041 // states separately.
21042 byrefStatesMatchGcHeapStates = false;
21047 // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
21048 fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
21052 // Are we seeing a GT_<cond> for a GT_QMARK node?
21053 if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
21055 // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
21056 // Recursively call fgLegacyPerStatementLocalVarLiveness.
21057 // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
21059 // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
21060 // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
21062 tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree);
21064 // We must have been returned here after seeing a GT_QMARK node.
21065 noway_assert(tree->gtOper == GT_QMARK);
21076 /*****************************************************************************/
21078 /*****************************************************************************
21079 * Initialize the TCB local and the NDirect stub, afterwards "push"
21080 * the hoisted NDirect stub.
21082 * 'initRegs' is the set of registers which will be zeroed out by the prolog
21083 * typically initRegs is zero
21085 * The layout of the NDirect Inlined Call Frame is as follows:
21086 * (see VM/frames.h and VM/JITInterface.cpp for more information)
21088 * offset field name when set
21089 * --------------------------------------------------------------
21090 * +00h vptr for class InlinedCallFrame method prolog
21091 * +04h m_Next method prolog
21092 * +08h m_Datum call site
21093 * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
21094 * +10h m_pCallerReturnAddress call site
21095 * +14h m_pCalleeSavedRegisters not set by JIT
21096 * +18h JIT retval spill area (int) before call_gc
21097 * +1ch JIT retval spill area (long) before call_gc
21098 * +20h Saved value of EBP method prolog
21101 regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
21103 assert(compiler->compGeneratingProlog);
21104 noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21105 noway_assert(compiler->info.compCallUnmanaged);
21107 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21108 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21110 /* let's find out if compLvFrameListRoot is enregistered */
21112 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21114 noway_assert(!varDsc->lvIsParam);
21115 noway_assert(varDsc->lvType == TYP_I_IMPL);
21117 DWORD threadTlsIndex, *pThreadTlsIndex;
21119 threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
21120 #if defined(_TARGET_X86_)
21121 if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
21126 // Instead of calling GetThread(), and getting GS cookie and
21127 // InlinedCallFrame vptr through indirections, we'll call only one helper.
21128 // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
21129 // and uses REG_PINVOKE_SCRATCH as scratch register.
21130 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
21131 pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21132 regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21134 // We're about to trask REG_PINVOKE_TCB, it better not be in use!
21135 assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
21137 // Don't use the argument registers (including the special argument in
21138 // REG_PINVOKE_FRAME) for computing the target address.
21139 regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
21141 genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
21143 regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
21145 if (varDsc->lvRegister)
21147 regNumber regTgt = varDsc->lvRegNum;
21149 // we are about to initialize it. So turn the bit off in initRegs to prevent
21150 // the prolog reinitializing it.
21151 initRegs &= ~genRegMask(regTgt);
21153 if (regTgt != REG_PINVOKE_TCB)
21155 // move TCB to the its register if necessary
21156 getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
21157 regTracker.rsTrackRegTrash(regTgt);
21162 // move TCB to its stack location
21163 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21164 compiler->info.compLvFrameListRoot, 0);
21167 // We are done, the rest of this function deals with the inlined case.
21173 if (varDsc->lvRegister)
21175 regTCB = varDsc->lvRegNum;
21177 // we are about to initialize it. So turn the bit off in initRegs to prevent
21178 // the prolog reinitializing it.
21179 initRegs &= ~genRegMask(regTCB);
21181 else // varDsc is allocated on the Stack
21183 regTCB = REG_PINVOKE_TCB;
21186 #if !defined(_TARGET_ARM_)
21187 #define WIN_NT_TLS_OFFSET (0xE10)
21188 #define WIN_NT5_TLS_HIGHOFFSET (0xf94)
21190 /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
21192 // TODO-ARM-CQ: should we inline TlsGetValue here?
21194 if (threadTlsIndex < 64)
21196 // mov reg, FS:[0xE10+threadTlsIndex*4]
21197 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
21198 WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
21199 regTracker.rsTrackRegTrash(regTCB);
21203 DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
21204 threadTlsIndex -= 64;
21206 // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
21207 // mov reg, [reg+threadTlsIndex*4]
21209 getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
21210 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
21211 regTracker.rsTrackRegTrash(regTCB);
21215 /* save TCB in local var if not enregistered */
21217 if (!varDsc->lvRegister)
21219 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
21222 /* set frame's vptr */
21224 const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
21225 inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
21226 noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
21228 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
21229 compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
21230 REG_PINVOKE_SCRATCH);
21232 // Set the GSCookie
21233 GSCookie gsCookie, *pGSCookie;
21234 compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
21235 noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
21237 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
21238 pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
21240 /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
21241 set next field in frame */
21243 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21244 pInfo->offsetOfThreadFrame);
21245 regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21247 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
21248 compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21250 noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
21252 /* set EBP value in frame */
21253 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
21254 compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
21256 /* reset track field in frame */
21257 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21258 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
21260 /* get address of our frame */
21262 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
21263 pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
21264 regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
21266 /* now "push" our N/direct frame */
21268 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
21269 pInfo->offsetOfThreadFrame);
21274 /*****************************************************************************
21275 * Unchain the InlinedCallFrame.
21276 * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
21279 void CodeGen::genPInvokeMethodEpilog()
21281 if (compiler->opts.ShouldUsePInvokeHelpers())
21284 noway_assert(compiler->info.compCallUnmanaged);
21285 noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
21286 noway_assert(compiler->compCurBB == compiler->genReturnBB ||
21287 (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
21288 (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
21290 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21291 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21293 getEmitter()->emitDisableRandomNops();
21294 // debug check to make sure that we're not using ESI and/or EDI across this call, except for
21295 // compLvFrameListRoot.
21296 unsigned regTrashCheck = 0;
21298 /* XXX Tue 5/29/2007
21299 * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code
21300 * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
21302 LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
21304 regNumber reg2 = REG_PINVOKE_FRAME;
21307 // Two cases for epilog invocation:
21310 // We can trash the ESI/EDI registers.
21313 // When tail called, we'd like to preserve enregistered args,
21314 // in ESI/EDI so we can pass it to the callee.
21316 // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
21317 // Instead use the reserved local variable slot.
21319 if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21321 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21323 #if FEATURE_FIXED_OUT_ARGS
21324 // Save the register in the reserved local var slot.
21325 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21326 compiler->lvaPInvokeFrameRegSaveVar, 0);
21328 inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
21331 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21333 #if FEATURE_FIXED_OUT_ARGS
21334 // Save the register in the reserved local var slot.
21335 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21336 compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21338 inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
21343 if (varDsc->lvRegister)
21345 reg = varDsc->lvRegNum;
21347 reg2 = REG_PINVOKE_TCB;
21349 regTrashCheck |= genRegMask(reg2);
21353 /* mov esi, [tcb address] */
21355 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
21357 regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21358 reg = REG_PINVOKE_TCB;
21360 regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
21363 /* mov edi, [ebp-frame.next] */
21365 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
21366 pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
21367 regTracker.rsTrackRegTrash(reg2);
21369 /* mov [esi+offsetOfThreadFrame], edi */
21371 getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
21373 noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
21375 if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
21376 compiler->lvaTable[compiler->genReturnLocal].lvRegister)
21378 // really make sure we're not clobbering compiler->genReturnLocal.
21380 !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
21381 ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
21384 (void)regTrashCheck;
21386 // Restore the registers ESI and EDI.
21387 if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
21389 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
21391 #if FEATURE_FIXED_OUT_ARGS
21392 // Restore the register from the reserved local var slot.
21393 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
21394 compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
21396 inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
21398 regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
21400 if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
21402 #if FEATURE_FIXED_OUT_ARGS
21403 // Restore the register from the reserved local var slot.
21404 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
21405 compiler->lvaPInvokeFrameRegSaveVar, 0);
21407 inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
21409 regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
21412 getEmitter()->emitEnableRandomNops();
21415 /*****************************************************************************
21416 This function emits the call-site prolog for direct calls to unmanaged code.
21417 It does all the necessary setup of the InlinedCallFrame.
21418 frameListRoot specifies the local containing the thread control block.
21419 argSize or methodToken is the value to be copied into the m_datum
21420 field of the frame (methodToken may be indirected & have a reloc)
21421 The function returns the register now containing the thread control block,
21422 (it could be either enregistered or loaded into one of the scratch registers)
21425 regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot,
21427 CORINFO_METHOD_HANDLE methodToken,
21428 BasicBlock* returnLabel)
21430 // Some stack locals might be 'cached' in registers, we need to trash them
21431 // from the regTracker *and* also ensure the gc tracker does not consider
21432 // them live (see the next assert). However, they might be live reg vars
21433 // that are non-pointers CSE'd from pointers.
21434 // That means the register will be live in rsMaskVars, so we can't just
21435 // call gcMarkSetNpt().
21437 regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
21438 gcInfo.gcRegGCrefSetCur &= ~deadRegs;
21439 gcInfo.gcRegByrefSetCur &= ~deadRegs;
21442 deadRegs &= regSet.rsMaskVars;
21445 for (LclVarDsc* varDsc = compiler->lvaTable;
21446 ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
21448 if (!varDsc->lvTracked || !varDsc->lvRegister)
21451 if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
21454 regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
21455 if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
21456 varRegMask |= genRegMask(varDsc->lvOtherReg);
21458 if (varRegMask & deadRegs)
21460 // We found the enregistered var that should not be live if it
21461 // was a GC pointer.
21462 noway_assert(!varTypeIsGC(varDsc));
21463 deadRegs &= ~varRegMask;
21470 /* Since we are using the InlinedCallFrame, we should have spilled all
21471 GC pointers to it - even from callee-saved registers */
21473 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
21475 /* must specify only one of these parameters */
21476 noway_assert((argSize == 0) || (methodToken == NULL));
21478 /* We are about to call unmanaged code directly.
21479 Before we can do that we have to emit the following sequence:
21481 mov dword ptr [frame.callTarget], MethodToken
21482 mov dword ptr [frame.callSiteTracker], esp
21483 mov reg, dword ptr [tcb_address]
21484 mov byte ptr [tcb+offsetOfGcState], 0
21488 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21490 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21492 #ifdef _TARGET_ARM_
21493 if (compiler->opts.ShouldUsePInvokeHelpers())
21496 int adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, false, &baseReg, 0);
21498 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21499 genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_BEGIN,
21501 EA_UNKNOWN); // retSize
21502 regTracker.rsTrackRegTrash(REG_ARG_0);
21507 /* mov dword ptr [frame.callSiteTarget], value */
21509 if (methodToken == NULL)
21511 /* mov dword ptr [frame.callSiteTarget], argSize */
21512 instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
21513 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21517 void *embedMethHnd, *pEmbedMethHnd;
21519 embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
21521 noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
21523 if (embedMethHnd != NULL)
21525 /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */
21527 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
21528 compiler->lvaInlinedPInvokeFrameVar,
21529 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21533 /* mov reg, dword ptr [MethodDescIndir]
21534 mov dword ptr [frame.callSiteTarget], reg */
21536 regNumber reg = regSet.rsPickFreeReg();
21538 #if CPU_LOAD_STORE_ARCH
21539 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
21540 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
21541 #else // !CPU_LOAD_STORE_ARCH
21542 getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
21543 #endif // !CPU_LOAD_STORE_ARCH
21544 regTracker.rsTrackRegTrash(reg);
21545 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
21546 pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
21550 regNumber tcbReg = REG_NA;
21552 if (frameListRoot->lvRegister)
21554 tcbReg = frameListRoot->lvRegNum;
21558 tcbReg = regSet.rsGrabReg(RBM_ALLINT);
21560 /* mov reg, dword ptr [tcb address] */
21562 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
21563 (unsigned)(frameListRoot - compiler->lvaTable), 0);
21564 regTracker.rsTrackRegTrash(tcbReg);
21567 #ifdef _TARGET_X86_
21568 /* mov dword ptr [frame.callSiteTracker], esp */
21570 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
21571 pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
21572 #endif // _TARGET_X86_
21574 #if CPU_LOAD_STORE_ARCH
21575 regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
21576 getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
21577 regTracker.rsTrackRegTrash(tmpReg);
21578 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
21579 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21580 #else // !CPU_LOAD_STORE_ARCH
21581 /* mov dword ptr [frame.callSiteReturnAddress], label */
21583 getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
21584 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21585 #endif // !CPU_LOAD_STORE_ARCH
21587 #if CPU_LOAD_STORE_ARCH
21588 instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
21590 noway_assert(tmpReg != tcbReg);
21592 getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
21593 #else // !CPU_LOAD_STORE_ARCH
21594 /* mov byte ptr [tcbReg+offsetOfGcState], 0 */
21596 getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
21597 #endif // !CPU_LOAD_STORE_ARCH
21602 /*****************************************************************************
21604 First we have to mark in the hoisted NDirect stub that we are back
21605 in managed code. Then we have to check (a global flag) whether GC is
21606 pending or not. If so, we just call into a jit-helper.
21607 Right now we have this call always inlined, i.e. we always skip around
21608 the jit-helper call.
21610 The tcb address is a regular local (initialized in the prolog), so it is either
21611 enregistered or in the frame:
21613 tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
21614 mov byte ptr[tcb_reg+offsetOfGcState], 1
21615 cmp 'global GC pending flag', 0
21617 [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC
21618 [mov [EBP+spill_area+0], eax] ; spill the int return value if any
21619 [mov [EBP+spill_area+4], edx] ; spill the long return value if any
21621 [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any
21622 [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any
21626 void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
21628 #ifdef _TARGET_ARM_
21629 if (compiler->opts.ShouldUsePInvokeHelpers())
21631 noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
21634 int adr = compiler->lvaFrameAddress(compiler->lvaInlinedPInvokeFrameVar, false, &baseReg, 0);
21636 getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, baseReg, adr);
21637 genEmitHelperCall(CORINFO_HELP_JIT_PINVOKE_END,
21639 EA_UNKNOWN); // retSize
21640 regTracker.rsTrackRegTrash(REG_ARG_0);
21645 BasicBlock* clab_nostop;
21646 CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
21650 #ifdef _TARGET_ARM_
21656 getEmitter()->emitDisableRandomNops();
21658 if (frameListRoot->lvRegister)
21660 /* make sure that register is live across the call */
21662 reg2 = frameListRoot->lvRegNum;
21663 noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
21667 /* mov reg2, dword ptr [tcb address] */
21668 CLANG_FORMAT_COMMENT_ANCHOR;
21670 #ifdef _TARGET_ARM_
21676 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
21677 (unsigned)(frameListRoot - compiler->lvaTable), 0);
21678 regTracker.rsTrackRegTrash(reg2);
21681 #ifdef _TARGET_ARM_
21683 /* strb [r2+offsetOfGcState], r3 */
21684 instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
21685 getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
21687 /* mov byte ptr [tcb+offsetOfGcState], 1 */
21688 getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
21691 /* test global flag (we return to managed code) */
21693 LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
21695 addrOfCaptureThreadGlobal =
21696 compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
21697 noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
21699 // Can we directly use addrOfCaptureThreadGlobal?
21701 if (addrOfCaptureThreadGlobal)
21703 #ifdef _TARGET_ARM_
21704 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
21705 getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21706 regTracker.rsTrackRegTrash(reg3);
21707 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21709 getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
21714 #ifdef _TARGET_ARM_
21715 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
21716 getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21717 regTracker.rsTrackRegTrash(reg3);
21718 getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
21719 getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
21720 #else // !_TARGET_ARM_
21722 getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
21723 (ssize_t)pAddrOfCaptureThreadGlobal);
21724 regTracker.rsTrackRegTrash(REG_ECX);
21726 getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
21728 #endif // !_TARGET_ARM_
21732 clab_nostop = genCreateTempLabel();
21734 /* Generate the conditional jump */
21735 emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
21736 inst_JMP(jmpEqual, clab_nostop);
21738 #ifdef _TARGET_ARM_
21739 // The helper preserves the return value on ARM
21741 /* save return value (if necessary) */
21742 if (retVal != RBM_NONE)
21744 if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21748 inst_RV(INS_push, REG_INTRET, TYP_INT);
21750 if (retVal == RBM_LNGRET)
21754 inst_RV(INS_push, REG_EDX, TYP_INT);
21760 /* emit the call to the EE-helper that stops for GC (or other reasons) */
21762 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
21763 EA_UNKNOWN); /* retSize */
21765 #ifdef _TARGET_ARM_
21766 // The helper preserves the return value on ARM
21768 /* restore return value (if necessary) */
21770 if (retVal != RBM_NONE)
21772 if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
21774 if (retVal == RBM_LNGRET)
21778 inst_RV(INS_pop, REG_EDX, TYP_INT);
21779 regTracker.rsTrackRegTrash(REG_EDX);
21784 inst_RV(INS_pop, REG_INTRET, TYP_INT);
21785 regTracker.rsTrackRegTrash(REG_INTRET);
21790 /* genCondJump() closes the current emitter block */
21792 genDefineTempLabel(clab_nostop);
21794 // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with
21795 // the above code. So the process is:
21796 // 1) Return to cooperative mode
21797 // 2) Check to see if we need to stop for GC
21798 // 3) Return from the p/invoke (as far as the stack walker is concerned).
21800 /* mov dword ptr [frame.callSiteTracker], 0 */
21802 instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
21803 pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
21805 getEmitter()->emitEnableRandomNops();
21808 /*****************************************************************************/
21810 /*****************************************************************************
21811 * TRACKING OF FLAGS
21812 *****************************************************************************/
21814 void CodeGen::genFlagsEqualToNone()
21816 genFlagsEqReg = REG_NA;
21817 genFlagsEqVar = (unsigned)-1;
21818 genFlagsEqLoc.Init();
21821 /*****************************************************************************
21823 * Record the fact that the flags register has a value that reflects the
21824 * contents of the given register.
21827 void CodeGen::genFlagsEqualToReg(GenTree* tree, regNumber reg)
21829 genFlagsEqLoc.CaptureLocation(getEmitter());
21830 genFlagsEqReg = reg;
21832 /* previous setting of flags by a var becomes invalid */
21834 genFlagsEqVar = 0xFFFFFFFF;
21836 /* Set appropriate flags on the tree */
21840 tree->gtFlags |= GTF_ZSF_SET;
21841 assert(tree->gtSetFlags());
21845 /*****************************************************************************
21847 * Record the fact that the flags register has a value that reflects the
21848 * contents of the given local variable.
21851 void CodeGen::genFlagsEqualToVar(GenTree* tree, unsigned var)
21853 genFlagsEqLoc.CaptureLocation(getEmitter());
21854 genFlagsEqVar = var;
21856 /* previous setting of flags by a register becomes invalid */
21858 genFlagsEqReg = REG_NA;
21860 /* Set appropriate flags on the tree */
21864 tree->gtFlags |= GTF_ZSF_SET;
21865 assert(tree->gtSetFlags());
21869 /*****************************************************************************
21871 * Return an indication of whether the flags register is set to the current
21872 * value of the given register/variable. The return value is as follows:
21875 * true .. the zero flag (ZF) and sign flag (SF) is set
21878 bool CodeGen::genFlagsAreReg(regNumber reg)
21880 if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21888 bool CodeGen::genFlagsAreVar(unsigned var)
21890 if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
21898 /*****************************************************************************
21899 * This utility function returns true iff the execution path from "from"
21900 * (inclusive) to "to" (exclusive) contains a death of the given var
21902 bool CodeGen::genContainsVarDeath(GenTree* from, GenTree* to, unsigned varNum)
21905 for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
21907 if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
21909 unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
21910 if (dyingVarNum == varNum)
21912 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
21913 if (varDsc->lvPromoted)
21915 assert(varDsc->lvType == TYP_STRUCT);
21916 unsigned firstFieldNum = varDsc->lvFieldLclStart;
21917 if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
21924 assert(tree != NULL);
21928 #endif // LEGACY_BACKEND