genCodeForTree_DONE(tree, reg);
}
-
-BasicBlock dummyBB;
-
-#ifdef _PREFAST_
-#pragma warning(push)
-#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
-#endif
-void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree,
- regMaskTP destReg,
- regMaskTP bestReg)
+void CodeGen::genCodeForBlkOp(GenTreePtr tree,
+ regMaskTP destReg)
{
- const genTreeOps oper = tree->OperGet();
- const var_types treeType = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
GenTreePtr op1 = tree->gtOp.gtOp1;
GenTreePtr op2 = tree->gtGetOp2();
- regNumber reg = DUMMY_INIT(REG_CORRUPT);
- regMaskTP regs = regSet.rsMaskUsed;
regMaskTP needReg = destReg;
- insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- emitAttr size;
- instruction ins;
- regMaskTP addrReg;
+ regMaskTP regs = regSet.rsMaskUsed;
GenTreePtr opsPtr[3];
regMaskTP regsPtr[3];
-#ifdef DEBUG
- addrReg = 0xDEADCAFE;
+ noway_assert(oper == GT_COPYBLK || oper == GT_INITBLK);
+ noway_assert(op1->IsList());
+
+#ifdef _TARGET_ARM_
+ if (tree->AsBlkOp()->IsVolatile())
+ {
+ // Emit a memory barrier instruction before the InitBlk/CopyBlk
+ instGen_MemoryBarrier();
+ }
#endif
+ {
+ GenTreePtr destPtr, srcPtrOrVal;
+ destPtr = op1->gtOp.gtOp1;
+ srcPtrOrVal = op1->gtOp.gtOp2;
+ noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
+ noway_assert((oper == GT_COPYBLK &&
+ (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet())))
+ ||
+ (oper == GT_INITBLK &&
+ varTypeIsIntegral(srcPtrOrVal->TypeGet())));
- noway_assert(tree->OperKind() & GTK_SMPOP);
+ noway_assert(op1 && op1->IsList());
+ noway_assert(destPtr && srcPtrOrVal);
- switch (oper)
- {
- case GT_ASG:
- genCodeForTreeSmpOpAsg(tree);
- return;
+#if CPU_USES_BLOCK_MOVE
+ regs = (oper == GT_INITBLK) ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
- case GT_ASG_LSH:
- case GT_ASG_RSH:
- case GT_ASG_RSZ:
- genCodeForAsgShift(tree, destReg, bestReg);
- return;
+ /* Some special code for block moves/inits for constant sizes */
- case GT_ASG_AND:
- case GT_ASG_OR :
- case GT_ASG_XOR:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
- return;
+ //
+ // Is this a fixed size COPYBLK?
+ // or a fixed size INITBLK with a constant init value?
+ //
+ if ((op2->IsCnsIntOrI()) &&
+ ((oper == GT_COPYBLK) || (srcPtrOrVal->IsCnsIntOrI())))
+ {
+ size_t length = (size_t)op2->gtIntCon.gtIconVal;
+ size_t initVal = 0;
+ instruction ins_P, ins_PR, ins_B;
- case GT_CHS:
- addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
-#ifdef _TARGET_XARCH_
- // Note that the specialCase here occurs when the treeType specifies a byte sized operation
- // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
- //
- bool specialCase; specialCase = false;
- if (op1->gtOper == GT_REG_VAR)
+ if (oper == GT_INITBLK)
{
- /* Get hold of the target register */
+ ins_P = INS_stosp;
+ ins_PR = INS_r_stosp;
+ ins_B = INS_stosb;
- reg = op1->gtRegVar.gtRegNum;
- if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+ /* Properly extend the init constant from a U1 to a U4 */
+ initVal = 0xFF & ((unsigned)op1->gtOp.gtOp2->gtIntCon.gtIconVal);
- inst_RV_RV(INS_mov, byteReg, reg);
- regTracker.rsTrackRegTrash(byteReg);
+ /* If it is a non-zero value we have to replicate */
+ /* the byte value four times to form the DWORD */
+ /* Then we change this new value into the tree-node */
- inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
- var_types op1Type = op1->TypeGet();
- instruction wideningIns = ins_Move_Extend(op1Type, true);
- inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
- regTracker.rsTrackRegTrash(reg);
- specialCase = true;
+ if (initVal)
+ {
+ initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+#ifdef _TARGET_64BIT_
+ if (length > 4)
+ {
+ initVal = initVal | (initVal << 32);
+ op1->gtOp.gtOp2->gtType = TYP_LONG;
+ }
+ else
+ {
+ op1->gtOp.gtOp2->gtType = TYP_INT;
+ }
+#endif // _TARGET_64BIT_
}
- }
-
- if (!specialCase)
- {
- inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
- }
-#else // not _TARGET_XARCH_
- if (op1->gtFlags & GTF_REG_VAL)
- {
- inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
+ op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
}
else
{
- // Fix 388382 ARM JitStress WP7
- var_types op1Type = op1->TypeGet();
- regNumber reg = regSet.rsPickFreeReg();
- inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
- regTracker.rsTrackRegTrash(reg);
- inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
- inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
+ ins_P = INS_movsp;
+ ins_PR = INS_r_movsp;
+ ins_B = INS_movsb;
}
-#endif
- if (op1->gtFlags & GTF_REG_VAL)
- regTracker.rsTrackRegTrash(op1->gtRegNum);
- genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
-
- genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
- return;
-
- case GT_AND:
- case GT_OR :
- case GT_XOR:
- case GT_ADD:
- case GT_SUB:
- case GT_MUL:
- genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
- return;
- case GT_UMOD:
- genCodeForUnsignedMod(tree, destReg, bestReg);
- return;
+ // Determine if we will be using SSE2
+ unsigned movqLenMin = 8;
+ unsigned movqLenMax = 24;
- case GT_MOD:
- genCodeForSignedMod(tree, destReg, bestReg);
- return;
+ bool bWillUseSSE2 = false;
+ bool bWillUseOnlySSE2 = false;
+ bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
- case GT_UDIV:
- genCodeForUnsignedDiv(tree, destReg, bestReg);
- return;
+#ifdef _TARGET_64BIT_
- case GT_DIV:
- genCodeForSignedDiv(tree, destReg, bestReg);
- return;
+ // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
+ // there is no point in wasting space on the bigger instructions
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- genCodeForShift(tree, destReg, bestReg);
- return;
+#else // !_TARGET_64BIT_
- case GT_NEG:
- case GT_NOT:
+ if (compiler->opts.compCanUseSSE2)
+ {
+ unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
- /* Generate the operand into some register */
+ /* Adjust for BB weight */
+ if (curBBweight == BB_ZERO_WEIGHT)
+ {
+ // Don't bother with this optimization in
+ // rarely run blocks
+ movqLenMax = movqLenMin = 0;
+ }
+ else if (curBBweight < BB_UNITY_WEIGHT)
+ {
+ // Be less aggressive when we are inside a conditional
+ movqLenMax = 16;
+ }
+ else if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT) / 2)
+ {
+ // Be more aggressive when we are inside a loop
+ movqLenMax = 48;
+ }
- genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
+ if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || (oper == GT_INITBLK))
+ {
+ // Be more aggressive when optimizing for speed
+ // InitBlk uses fewer instructions
+ movqLenMax += 16;
+ }
- reg = op1->gtRegNum;
+ if (compiler->compCodeOpt() != Compiler::SMALL_CODE &&
+ length >= movqLenMin &&
+ length <= movqLenMax)
+ {
+ bWillUseSSE2 = true;
- /* Negate/reverse the value in the register */
+ if ((length % 8) == 0)
+ {
+ bWillUseOnlySSE2 = true;
+ if (oper == GT_INITBLK && (initVal == 0))
+ {
+ bNeedEvaluateCnst = false;
+ noway_assert((op1->gtOp.gtOp2->OperGet() == GT_CNS_INT));
+ }
+ }
+ }
+ }
- inst_RV((oper == GT_NEG) ? INS_NEG
- : INS_NOT, reg, treeType);
+#endif // !_TARGET_64BIT_
- /* The register is now trashed */
+ const bool bWillTrashRegSrc = ((oper == GT_COPYBLK) && !bWillUseOnlySSE2);
+ /* Evaluate dest and src/val */
- regTracker.rsTrackRegTrash(reg);
+ if (op1->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (bNeedEvaluateCnst)
+ {
+ genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+ }
+ genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+ if (bNeedEvaluateCnst)
+ {
+ genRecoverReg(op1->gtOp.gtOp2, regs, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+ if (bNeedEvaluateCnst)
+ {
+ genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+ }
+ genRecoverReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::KEEP_REG);
+ }
- genCodeForTree_DONE(tree, reg);
- return;
+ bool bTrashedESI = false;
+ bool bTrashedEDI = false;
- case GT_IND:
- case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
+ if (bWillUseSSE2)
+ {
+ int blkDisp = 0;
+ regNumber xmmReg = REG_XMM0;
- /* Make sure the operand is addressable */
+ if (oper == GT_INITBLK)
+ {
+ if (initVal)
+ {
+ getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
+ getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
+ }
+ }
- addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
+ JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
+ length, (oper == GT_INITBLK) ? "initblk" : "copyblk", compiler->info.compFullName));
- genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ while (length > 7)
+ {
+ if (oper == GT_INITBLK)
+ {
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+ }
+ blkDisp += 8;
+ length -= 8;
+ }
- /* Figure out the size of the value being loaded */
+ if (length > 0)
+ {
+ noway_assert(bNeedEvaluateCnst);
+ noway_assert(!bWillUseOnlySSE2);
- size = EA_ATTR(genTypeSize(tree->gtType));
+ if (oper == GT_COPYBLK)
+ {
+ inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
+ bTrashedESI = true;
+ }
- /* Pick a register for the value */
+ inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
+ bTrashedEDI = true;
- if (needReg == RBM_ALLINT && bestReg == 0)
+ if (length >= REGSIZE_BYTES)
+ {
+ instGen(ins_P);
+ length -= REGSIZE_BYTES;
+ }
+ }
+ }
+ else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
{
- /* Absent a better suggestion, pick a useless register */
+ /* For small code, we can only use ins_DR to generate fast
+ and small code. We also can't use "rep movsb" because
+ we may not atomically reading and writing the DWORD */
- bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
+ noway_assert(bNeedEvaluateCnst);
+
+ goto USE_DR;
}
+ else if (length <= 4 * REGSIZE_BYTES)
+ {
+ noway_assert(bNeedEvaluateCnst);
- reg = regSet.rsPickReg(needReg, bestReg);
+ while (length >= REGSIZE_BYTES)
+ {
+ instGen(ins_P);
+ length -= REGSIZE_BYTES;
+ }
- if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
- {
- noway_assert(size == EA_PTRSIZE);
- getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
- EA_PTRSIZE,
- reg,
- FLD_GLOBAL_FS,
- (int)op1->gtIntCon.gtIconVal);
+ bTrashedEDI = true;
+ if (oper == GT_COPYBLK)
+ bTrashedESI = true;
}
else
{
- /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
+ USE_DR:
+ noway_assert(bNeedEvaluateCnst);
- inst_mov_RV_ST(reg, tree);
- }
+ /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
+ genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
-#ifdef _TARGET_ARM_
- if (tree->gtFlags & GTF_IND_VOLATILE)
- {
- // Emit a memory barrier instruction after the load
- instGen_MemoryBarrier();
- }
-#endif
+ length &= (REGSIZE_BYTES - 1);
- /* Note the new contents of the register we used */
+ instGen(ins_PR);
- regTracker.rsTrackRegTrash(reg);
+ regTracker.rsTrackRegTrash(REG_ECX);
- /* Update the live set of register variables */
+ bTrashedEDI = true;
+ if (oper == GT_COPYBLK)
+ bTrashedESI = true;
+ }
-#ifdef DEBUG
- if (compiler->opts.varNames) genUpdateLife(tree);
-#endif
+ /* Now take care of the remainder */
- /* Now we can update the register pointer information */
+#ifdef _TARGET_64BIT_
+ if (length > 4)
+ {
+ noway_assert(bNeedEvaluateCnst);
+ noway_assert(length < 8);
-// genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- gcInfo.gcMarkRegPtrVal(reg, treeType);
+ instGen((oper == GT_INITBLK) ? INS_stosd : INS_movsd);
+ length -= 4;
- genCodeForTree_DONE_LIFE(tree, reg);
- return;
+ bTrashedEDI = true;
+ if (oper == GT_COPYBLK)
+ bTrashedESI = true;
+ }
- case GT_CAST:
+#endif // _TARGET_64BIT_
- genCodeForNumericCast(tree, destReg, bestReg);
- return;
+ if (length)
+ {
+ noway_assert(bNeedEvaluateCnst);
+ while (length--)
+ {
+ instGen(ins_B);
+ }
- case GT_JTRUE:
+ bTrashedEDI = true;
+ if (oper == GT_COPYBLK)
+ bTrashedESI = true;
+ }
- /* Is this a test of a relational operator? */
+ noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
+ if (bTrashedEDI)
+ regTracker.rsTrackRegTrash(REG_EDI);
+ if (bTrashedESI)
+ regTracker.rsTrackRegTrash(REG_ESI);
+ // else No need to trash EAX as it wasnt destroyed by the "rep stos"
- if (op1->OperIsCompare())
- {
- /* Generate the conditional jump */
+ genReleaseReg(op1->gtOp.gtOp1);
+ if (bNeedEvaluateCnst) genReleaseReg(op1->gtOp.gtOp2);
- genCondJump(op1);
+ }
+ else
+ {
+ //
+ // This a variable-sized COPYBLK/INITBLK,
+ // or a fixed size INITBLK with a variable init value,
+ //
- genUpdateLife(tree);
- return;
- }
+ // What order should the Dest, Val/Src, and Size be calculated
-#ifdef DEBUG
- compiler->gtDispTree(tree);
-#endif
- NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
- break;
+ compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX,
+ opsPtr, regsPtr); // OUT arguments
- case GT_SWITCH:
- genCodeForSwitch(tree);
- return;
+ noway_assert(((oper == GT_INITBLK) && (regs == RBM_EAX)) || ((oper == GT_COPYBLK) && (regs == RBM_ESI)));
+ genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
+ genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
+ genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
- case GT_RETFILT:
- noway_assert(tree->gtType == TYP_VOID || op1 != 0);
- if (op1 == 0) // endfinally
- {
- reg = REG_NA;
+ genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+ genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
-#ifdef _TARGET_XARCH_
- /* Return using a pop-jmp sequence. As the "try" block calls
- the finally with a jmp, this leaves the x86 call-ret stack
- balanced in the normal flow of path. */
+ noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest
+ (op1->gtOp.gtOp1->gtRegNum == REG_EDI));
- noway_assert(isFramePointerRequired());
- inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
- inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
-#elif defined(_TARGET_ARM_)
- // Nothing needed for ARM
-#else
- NYI("TARGET");
+ noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src
+ (genRegMask(op1->gtOp.gtOp2->gtRegNum) == regs));
+
+ noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size
+ (op2->gtRegNum == REG_ECX));
+
+ if (oper == GT_INITBLK)
+ instGen(INS_r_stosb);
+ else
+ instGen(INS_r_movsb);
+
+ regTracker.rsTrackRegTrash(REG_EDI);
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ if (oper == GT_COPYBLK)
+ regTracker.rsTrackRegTrash(REG_ESI);
+ // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+
+ genReleaseReg(opsPtr[0]);
+ genReleaseReg(opsPtr[1]);
+ genReleaseReg(opsPtr[2]);
+ }
+
+#else // !CPU_USES_BLOCK_MOVE
+
+#ifndef _TARGET_ARM_
+ // Currently only the ARM implementation is provided
+#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
#endif
- }
- else // endfilter
+ //
+ // Is this a fixed size COPYBLK?
+ // or a fixed size INITBLK with a constant init value?
+ //
+ if ((op2->OperGet() == GT_CNS_INT) &&
+ ((oper == GT_COPYBLK) || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
+ {
+ GenTreePtr dstOp = op1->gtOp.gtOp1;
+ GenTreePtr srcOp = op1->gtOp.gtOp2;
+ unsigned length = (unsigned)op2->gtIntCon.gtIconVal;
+ unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
+ unsigned initVal = 0;
+ bool useLoop = false;
+
+ if (oper == GT_INITBLK)
{
- genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_INTRET);
- /* The return value has now been computed */
- reg = op1->gtRegNum;
+ /* Properly extend the init constant from a U1 to a U4 */
+ initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
- /* Return */
- instGen_Return(0);
+ /* If it is a non-zero value we have to replicate */
+ /* the byte value four times to form the DWORD */
+ /* Then we store this new value into the tree-node */
+
+ if (initVal != 0)
+ {
+ initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+ op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
+ }
}
- genCodeForTree_DONE(tree, reg);
- return;
+ // Will we be using a loop to implement this INITBLK/COPYBLK?
+ if (((oper == GT_COPYBLK) && (fullStoreCount >= 8)) ||
+ ((oper == GT_INITBLK) && (fullStoreCount >= 16)))
+ {
+ useLoop = true;
+ }
- case GT_RETURN:
+ regMaskTP usedRegs;
+ regNumber regDst;
+ regNumber regSrc;
+ regNumber regTemp;
-#if INLINE_NDIRECT
+ /* Evaluate dest and src/val */
- // TODO: this should be done AFTER we called exit mon so that
- // we are sure that we don't have to keep 'this' alive
+ if (op1->gtFlags & GTF_REVERSE_OPS)
+ {
+ genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
- if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+ regDst = dstOp->gtRegNum;
+
+ genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
+ regSrc = srcOp->gtRegNum;
+ }
+ else
{
- /* either it's an "empty" statement or the return statement
- of a synchronized method
- */
+ genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(dstOp->gtFlags & GTF_REG_VAL);
- genPInvokeMethodEpilog();
+ genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
+ regSrc = srcOp->gtRegNum;
+
+ genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
+ regDst = dstOp->gtRegNum;
}
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
-#endif
+ regDst = dstOp->gtRegNum;
+ regSrc = srcOp->gtRegNum;
+ usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
+ bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
+ emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ emitAttr srcType;
- /* Is there a return value and/or an exit statement? */
+ if (oper == GT_COPYBLK)
+ {
+ // Prefer a low register,but avoid one of the ones we've already grabbed
+ regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ usedRegs |= genRegMask(regTemp);
+ bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
+ srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ }
+ else
+ {
+ regTemp = REG_STK;
+ srcType = EA_PTRSIZE;
+ }
- if (op1)
+ instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
+ instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
+
+ int finalOffset;
+
+ // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
+ if (!useLoop)
{
- if (op1->gtType == TYP_VOID)
+ for (unsigned i = 0; i < fullStoreCount; i++)
{
- //We're returning nothing, just generate the block (shared epilog calls).
- genCodeForTree(op1, 0);
- }
-#ifdef _TARGET_ARM_
- else if (op1->gtType == TYP_STRUCT)
- {
- if (op1->gtOper == GT_CALL)
+ if (oper == GT_COPYBLK)
{
- // We have a return call() because we failed to tail call.
- // In any case, just generate the call and be done.
- assert(compiler->IsHfa(op1));
- genCodeForCall(op1, true);
- genMarkTreeInReg(op1, REG_FLOATRET);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
}
else
{
- assert(op1->gtOper == GT_LCL_VAR);
- assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
- genLoadIntoFltRetRegs(op1);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
}
}
- else if (op1->TypeGet() == TYP_FLOAT)
- {
- // This can only occur when we are returning a non-HFA struct
- // that is composed of a single float field and we performed
- // struct promotion and enregistered the float field.
- //
- genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
- getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
- }
-#endif // _TARGET_ARM_
- else
- {
- //we can now go through this code for compiler->genReturnBB. I've regularized all the code.
- //noway_assert(compiler->compCurBB != compiler->genReturnBB);
-
- noway_assert(op1->gtType != TYP_VOID);
+ finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
+ length -= finalOffset;
+ }
+ else // We will use a loop to implement this INITBLK/COPYBLK
+ {
+ unsigned pairStoreLoopCount = fullStoreCount / 2;
- /* Generate the return value into the return register */
+ // We need a second temp register for CopyBlk
+ regNumber regTemp2 = REG_STK;
+ if (oper == GT_COPYBLK)
+ {
+ // Prefer a low register, but avoid one of the ones we've already grabbed
+ regTemp2 = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ usedRegs |= genRegMask(regTemp2);
+ }
- genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+ // Pick and initialize the loop counter register
+ regNumber regLoopIndex;
+ regLoopIndex = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
- /* The result must now be in the return register */
+ // Create and define the Basic Block for the loop top
+ BasicBlock * loopTopBlock = genCreateTempLabel();
+ genDefineTempLabel(loopTopBlock);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
- noway_assert(op1->gtRegNum == REG_INTRET);
+ // The loop body
+ if (oper == GT_COPYBLK)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
+ regTracker.rsTrackRegTrash(regSrc);
+ regTracker.rsTrackRegTrash(regTemp);
+ regTracker.rsTrackRegTrash(regTemp2);
+ }
+ else // GT_INITBLK
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
}
- /* The return value has now been computed */
+ getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(regDst);
+ getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
+ emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
+ inst_JMP(jmpGTS, loopTopBlock);
- reg = op1->gtRegNum;
+ regTracker.rsTrackRegIntCns(regLoopIndex, 0);
- genCodeForTree_DONE(tree, reg);
+ length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
+ if (length & TARGET_POINTER_SIZE)
+ {
+ if (oper == GT_COPYBLK)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+ }
+ finalOffset = TARGET_POINTER_SIZE;
+ length -= TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ finalOffset = 0;
+ }
}
- //The profiling hook does not trash registers, so it's safe to call after we emit the code for
- //the GT_RETURN tree.
-#ifdef PROFILING_SUPPORTED
- if (compiler->compCurBB == compiler->genReturnBB)
+ if (length & sizeof(short))
{
- genProfilingLeaveCallback();
- }
-#endif
-#ifdef DEBUG
- if (compiler->opts.compStackCheckOnRet)
- {
- noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
- compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
- getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+ loadIns = ins_Load(TYP_USHORT); // INS_ldrh
+ storeIns = ins_Store(TYP_USHORT); // INS_strh
- BasicBlock * esp_check = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, esp_check);
- getEmitter()->emitIns(INS_BREAKPOINT);
- genDefineTempLabel(esp_check);
+ if (oper == GT_COPYBLK)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
+ }
+ length -= sizeof(short);
+ finalOffset += sizeof(short);
}
-#endif
- return;
-
- case GT_COMMA:
- if (tree->gtFlags & GTF_REVERSE_OPS)
+ if (length & sizeof(char))
{
- if (tree->gtType == TYP_VOID)
+ loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
+ storeIns = ins_Store(TYP_UBYTE); // INS_strb
+
+ if (oper == GT_COPYBLK)
{
- genEvalSideEffects(op2);
- genUpdateLife (op2);
- genEvalSideEffects(op1);
- genUpdateLife(tree);
- return;
+ getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
}
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
+ }
+ length -= sizeof(char);
+ }
+ assert(length == 0);
- // Generate op2
- genCodeForTree(op2, needReg);
- genUpdateLife(op2);
-
- noway_assert(op2->gtFlags & GTF_REG_VAL);
-
- regSet.rsMarkRegUsed(op2);
-
- // Do side effects of op1
- genEvalSideEffects(op1);
+ genReleaseReg(dstOp);
+ genReleaseReg(srcOp);
+ }
+ else
+ {
+ //
+ // This a variable-sized COPYBLK/INITBLK,
+ // or a fixed size INITBLK with a variable init value,
+ //
- // Recover op2 if spilled
- genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
+ // What order should the Dest, Val/Src, and Size be calculated
- regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
+ compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2,
+ opsPtr, regsPtr); // OUT arguments
- // set gc info if we need so
- gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
+ genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
- genUpdateLife(tree);
- genCodeForTree_DONE(tree, op2->gtRegNum);
+ genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+ genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
- return;
- }
- else
- {
- noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+ noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest
+ (op1->gtOp.gtOp1->gtRegNum == REG_ARG_0));
- /* Generate side effects of the first operand */
+ noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src
+ (op1->gtOp.gtOp2->gtRegNum == REG_ARG_1));
- genEvalSideEffects(op1);
- genUpdateLife (op1);
+ noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size
+ (op2->gtRegNum == REG_ARG_2));
- /* Is the value of the second operand used? */
+ regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
- if (tree->gtType == TYP_VOID)
- {
- /* The right operand produces no result. The morpher is
- responsible for resetting the type of GT_COMMA nodes
- to TYP_VOID if op2 isn't meant to yield a result. */
+ genEmitHelperCall(oper == GT_COPYBLK ? CORINFO_HELP_MEMCPY
+ /* GT_INITBLK */ : CORINFO_HELP_MEMSET,
+ 0, EA_UNKNOWN);
- genEvalSideEffects(op2);
- genUpdateLife(tree);
- return;
- }
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
- /* Generate the second operand, i.e. the 'real' value */
+ regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+ genReleaseReg(opsPtr[0]);
+ genReleaseReg(opsPtr[1]);
+ genReleaseReg(opsPtr[2]);
+ }
- genCodeForTree(op2, needReg);
- noway_assert(op2->gtFlags & GTF_REG_VAL);
+ if ((oper == GT_COPYBLK) && tree->AsBlkOp()->IsVolatile())
+ {
+ // Emit a memory barrier instruction after the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif // !CPU_USES_BLOCK_MOVE
+ }
+}
+BasicBlock dummyBB;
- /* The result of 'op2' is also the final result */
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree,
+ regMaskTP destReg,
+ regMaskTP bestReg)
+{
+ const genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP regs = regSet.rsMaskUsed;
+ regMaskTP needReg = destReg;
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ emitAttr size;
+ instruction ins;
+ regMaskTP addrReg;
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
- reg = op2->gtRegNum;
+#ifdef DEBUG
+ addrReg = 0xDEADCAFE;
+#endif
- /* Remember whether we set the flags */
+ noway_assert(tree->OperKind() & GTK_SMPOP);
- tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
+ switch (oper)
+ {
+ case GT_ASG:
+ genCodeForTreeSmpOpAsg(tree);
+ return;
- genCodeForTree_DONE(tree, reg);
- return;
- }
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ genCodeForAsgShift(tree, destReg, bestReg);
+ return;
- case GT_BOX:
- genCodeForTree(op1, needReg);
- noway_assert(op1->gtFlags & GTF_REG_VAL);
+ case GT_ASG_AND:
+ case GT_ASG_OR :
+ case GT_ASG_XOR:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
+ return;
- /* The result of 'op1' is also the final result */
+ case GT_CHS:
+ addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
+#ifdef _TARGET_XARCH_
+ // Note that the specialCase here occurs when the treeType specifies a byte sized operation
+ // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
+ //
+ bool specialCase; specialCase = false;
+ if (op1->gtOper == GT_REG_VAR)
+ {
+ /* Get hold of the target register */
- reg = op1->gtRegNum;
+ reg = op1->gtRegVar.gtRegNum;
+ if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
- /* Remember whether we set the flags */
+ inst_RV_RV(INS_mov, byteReg, reg);
+ regTracker.rsTrackRegTrash(byteReg);
- tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
+ inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
+ var_types op1Type = op1->TypeGet();
+ instruction wideningIns = ins_Move_Extend(op1Type, true);
+ inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
+ regTracker.rsTrackRegTrash(reg);
+ specialCase = true;
+ }
+ }
- genCodeForTree_DONE(tree, reg);
+ if (!specialCase)
+ {
+ inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
+ }
+#else // not _TARGET_XARCH_
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
+ }
+ else
+ {
+ // Fix 388382 ARM JitStress WP7
+ var_types op1Type = op1->TypeGet();
+ regNumber reg = regSet.rsPickFreeReg();
+ inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
+ regTracker.rsTrackRegTrash(reg);
+ inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
+ inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
+ }
+#endif
+ if (op1->gtFlags & GTF_REG_VAL)
+ regTracker.rsTrackRegTrash(op1->gtRegNum);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
return;
- case GT_QMARK:
+ case GT_AND:
+ case GT_OR :
+ case GT_XOR:
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
+ return;
- genCodeForQmark(tree, destReg, bestReg);
+ case GT_UMOD:
+ genCodeForUnsignedMod(tree, destReg, bestReg);
return;
- case GT_NOP:
+ case GT_MOD:
+ genCodeForSignedMod(tree, destReg, bestReg);
+ return;
-#if OPT_BOOL_OPS
- if (op1 == NULL)
- return;
-#endif
+ case GT_UDIV:
+ genCodeForUnsignedDiv(tree, destReg, bestReg);
+ return;
- /* Generate the operand into some register */
+ case GT_DIV:
+ genCodeForSignedDiv(tree, destReg, bestReg);
+ return;
- genCodeForTree(op1, needReg);
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ genCodeForShift(tree, destReg, bestReg);
+ return;
- /* The result is the same as the operand */
+ case GT_NEG:
+ case GT_NOT:
- reg = op1->gtRegNum;
+ /* Generate the operand into some register */
- genCodeForTree_DONE(tree, reg);
- return;
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
- case GT_INTRINSIC:
+ reg = op1->gtRegNum;
- switch (tree->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Round:
- {
- noway_assert(tree->gtType == TYP_INT);
+ /* Negate/reverse the value in the register */
-#if FEATURE_STACK_FP_X87
- genCodeForTreeFlt(op1);
+ inst_RV((oper == GT_NEG) ? INS_NEG
+ : INS_NOT, reg, treeType);
- /* Store the FP value into the temp */
- TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
+ /* The register is now trashed */
- FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
- FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
- inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
+ regTracker.rsTrackRegTrash(reg);
- reg = regSet.rsPickReg(needReg, bestReg);
- regTracker.rsTrackRegTrash(reg);
+ genCodeForTree_DONE(tree, reg);
+ return;
- inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
+ case GT_IND:
+ case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
- compiler->tmpRlsTemp(temp);
-#else
- genCodeForTreeFloat(tree, needReg, bestReg);
- return;
-#endif
- }
- break;
+ /* Make sure the operand is addressable */
- default:
- noway_assert(!"unexpected math intrinsic");
+ addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
- }
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
- genCodeForTree_DONE(tree, reg);
- return;
+ /* Figure out the size of the value being loaded */
- case GT_LCLHEAP:
+ size = EA_ATTR(genTypeSize(tree->gtType));
- reg = genLclHeap(op1);
- genCodeForTree_DONE(tree, reg);
- return;
+ /* Pick a register for the value */
- case GT_COPYOBJ:
- noway_assert(op1->IsList());
+ if (needReg == RBM_ALLINT && bestReg == 0)
+ {
+ /* Absent a better suggestion, pick a useless register */
- /* If the value class doesn't have any fields that are GC refs or
- the target isn't on the GC-heap, we can merge it with CPBLK.
- GC fields cannot be copied directly, instead we will
- need to use a jit-helper for that. */
- assert(tree->AsCpObj()->gtGcPtrCount > 0);
+ bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
+ }
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
{
- GenTreeCpObj* cpObjOp = tree->AsCpObj();
+ noway_assert(size == EA_PTRSIZE);
+ getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL),
+ EA_PTRSIZE,
+ reg,
+ FLD_GLOBAL_FS,
+ (int)op1->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
+
+ inst_mov_RV_ST(reg, tree);
+ }
#ifdef _TARGET_ARM_
- if (cpObjOp->IsVolatile())
- {
- // Emit a memory barrier instruction before the CopyBlk
- instGen_MemoryBarrier();
- }
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
#endif
- GenTreePtr srcObj = cpObjOp->Source();
- GenTreePtr dstObj = cpObjOp->Dest();
- noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
+ /* Note the new contents of the register we used */
-#ifdef DEBUG
- CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
- size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(reg);
- // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
- // The EE currently does not allow this. Let's assert it just to be safe.
- noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
+ /* Update the live set of register variables */
+
+#ifdef DEBUG
+ if (compiler->opts.varNames) genUpdateLife(tree);
#endif
- size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
- unsigned slots = cpObjOp->gtSlots;
- BYTE * gcPtrs = cpObjOp->gtGcPtrs;
- unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
+ /* Now we can update the register pointer information */
- // If we have GC pointers then the GTF_BLK_HASGCPTR flags must be set
- if (gcPtrCount > 0)
- assert((tree->gtFlags & GTF_BLK_HASGCPTR) != 0);
+// genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
- GenTreePtr treeFirst, treeSecond;
- regNumber regFirst, regSecond;
+ genCodeForTree_DONE_LIFE(tree, reg);
+ return;
- // Check what order the object-ptrs have to be evaluated in ?
+ case GT_CAST:
- if (op1->gtFlags & GTF_REVERSE_OPS)
- {
- treeFirst = srcObj;
- treeSecond = dstObj;
-#if CPU_USES_BLOCK_MOVE
- regFirst = REG_ESI;
- regSecond = REG_EDI;
-#else
- regFirst = REG_ARG_1;
- regSecond = REG_ARG_0;
+ genCodeForNumericCast(tree, destReg, bestReg);
+ return;
+
+
+ case GT_JTRUE:
+
+ /* Is this a test of a relational operator? */
+
+ if (op1->OperIsCompare())
+ {
+ /* Generate the conditional jump */
+
+ genCondJump(op1);
+
+ genUpdateLife(tree);
+ return;
+ }
+
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
#endif
- }
- else
- {
- treeFirst = dstObj;
- treeSecond = srcObj;
-#if CPU_USES_BLOCK_MOVE
- regFirst = REG_EDI;
- regSecond = REG_ESI;
+ NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
+ break;
+
+ case GT_SWITCH:
+ genCodeForSwitch(tree);
+ return;
+
+ case GT_RETFILT:
+ noway_assert(tree->gtType == TYP_VOID || op1 != 0);
+ if (op1 == 0) // endfinally
+ {
+ reg = REG_NA;
+
+#ifdef _TARGET_XARCH_
+ /* Return using a pop-jmp sequence. As the "try" block calls
+ the finally with a jmp, this leaves the x86 call-ret stack
+ balanced in the normal flow of path. */
+
+ noway_assert(isFramePointerRequired());
+ inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+ inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
+#elif defined(_TARGET_ARM_)
+ // Nothing needed for ARM
#else
- regFirst = REG_ARG_0;
- regSecond = REG_ARG_1;
+ NYI("TARGET");
#endif
- }
+ }
+ else // endfilter
+ {
+ genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_INTRET);
+ /* The return value has now been computed */
+ reg = op1->gtRegNum;
- bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
- bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
- emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ /* Return */
+ instGen_Return(0);
+ }
- // Materialize the trees in the order desired
+ genCodeForTree_DONE(tree, reg);
+ return;
-#if CPU_USES_BLOCK_MOVE
- genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+ case GT_RETURN:
- // Grab ECX because it will be trashed by the helper
- //
- regSet.rsGrabReg(RBM_ECX);
+#if INLINE_NDIRECT
- while (blkSize >= TARGET_POINTER_SIZE)
+ // TODO: this should be done AFTER we called exit mon so that
+ // we are sure that we don't have to keep 'this' alive
+
+ if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ {
+ /* either it's an "empty" statement or the return statement
+ of a synchronized method
+ */
+
+ genPInvokeMethodEpilog();
+ }
+
+#endif
+
+ /* Is there a return value and/or an exit statement? */
+
+ if (op1)
+ {
+ if (op1->gtType == TYP_VOID)
{
- if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
+ //We're returning nothing, just generate the block (shared epilog calls).
+ genCodeForTree(op1, 0);
+ }
+#ifdef _TARGET_ARM_
+ else if (op1->gtType == TYP_STRUCT)
+ {
+ if (op1->gtOper == GT_CALL)
{
- // Note that we can use movsd even if it is a GC pointer being transfered
- // because the value is not cached anywhere. If we did this in two moves,
- // we would have to make certain we passed the appropriate GC info on to
- // the emitter.
- instGen(INS_movsp);
+ // We have a return call() because we failed to tail call.
+ // In any case, just generate the call and be done.
+ assert(compiler->IsHfa(op1));
+ genCodeForCall(op1, true);
+ genMarkTreeInReg(op1, REG_FLOATRET);
}
else
{
- // This helper will act like a MOVSD
- // -- inputs EDI and ESI are byrefs
- // -- including incrementing of ESI and EDI by 4
- // -- helper will trash ECX
- //
- regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
- regSet.rsLockUsedReg(argRegs);
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
- 0, // argSize
- EA_PTRSIZE); // retSize
- regSet.rsUnlockUsedReg(argRegs);
+ assert(op1->gtOper == GT_LCL_VAR);
+ assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
+ genLoadIntoFltRetRegs(op1);
}
-
- blkSize -= TARGET_POINTER_SIZE;
}
+ else if (op1->TypeGet() == TYP_FLOAT)
+ {
+ // This can only occur when we are returning a non-HFA struct
+ // that is composed of a single float field and we performed
+ // struct promotion and enregistered the float field.
+ //
+ genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
+ }
+#endif // _TARGET_ARM_
+ else
+ {
+ //we can now go through this code for compiler->genReturnBB. I've regularized all the code.
- // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
+ //noway_assert(compiler->compCurBB != compiler->genReturnBB);
+
+ noway_assert(op1->gtType != TYP_VOID);
- regTracker.rsTrackRegTrash(REG_EDI);
- regTracker.rsTrackRegTrash(REG_ESI);
- regTracker.rsTrackRegTrash(REG_ECX);
+ /* Generate the return value into the return register */
- gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
+ genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
- /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
- it is a emitNoGChelper. However, we have to let the emitter know that
- the GC liveness has changed. We do this by creating a new label.
- */
+ /* The result must now be in the return register */
- noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_INTRET);
+ }
- genDefineTempLabel(&dummyBB);
+ /* The return value has now been computed */
-#else // !CPU_USES_BLOCK_MOVE
+ reg = op1->gtRegNum;
-#ifndef _TARGET_ARM_
- // Currently only the ARM implementation is provided
-#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
-#endif
+ genCodeForTree_DONE(tree, reg);
- bool helperUsed;
- regNumber regDst;
- regNumber regSrc;
- regNumber regTemp;
+ }
- if ((gcPtrCount > 0) && !dstIsOnStack)
- {
- genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
- genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+ //The profiling hook does not trash registers, so it's safe to call after we emit the code for
+ //the GT_RETURN tree.
+#ifdef PROFILING_SUPPORTED
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+#ifdef DEBUG
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
- /* The helper is a Asm-routine that will trash R2,R3 and LR */
- {
- /* Spill any callee-saved registers which are being used */
- regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
+ BasicBlock * esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+ return;
- if (spillRegs)
- {
- regSet.rsSpillRegs(spillRegs);
- }
- }
+ case GT_COMMA:
- // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
- // We will also use it as the temp register for our load/store sequences
- //
- assert(REG_R2 == REG_TMP_1);
- regTemp = regSet.rsGrabReg(RBM_R2);
- helperUsed = true;
- }
- else
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (tree->gtType == TYP_VOID)
{
- genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
- genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
- genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
-
- // Grab any temp register to use for our load/store sequences
- //
- regTemp = regSet.rsGrabReg(RBM_ALLINT);
- helperUsed = false;
+ genEvalSideEffects(op2);
+ genUpdateLife (op2);
+ genEvalSideEffects(op1);
+ genUpdateLife(tree);
+ return;
}
- assert(dstObj->gtFlags & GTF_REG_VAL);
- assert(srcObj->gtFlags & GTF_REG_VAL);
-
- regDst = dstObj->gtRegNum;
- regSrc = srcObj->gtRegNum;
- assert(regDst != regTemp);
- assert(regSrc != regTemp);
-
- instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
- instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
-
- size_t offset = 0;
- while (blkSize >= TARGET_POINTER_SIZE)
- {
- CorInfoGCType gcType;
- CorInfoGCType gcTypeNext = TYPE_GC_NONE;
- var_types type = TYP_I_IMPL;
+ // Generate op2
+ genCodeForTree(op2, needReg);
+ genUpdateLife(op2);
-#if FEATURE_WRITE_BARRIER
- gcType = (CorInfoGCType)(*gcPtrs++);
- if (blkSize > TARGET_POINTER_SIZE)
- gcTypeNext = (CorInfoGCType)(*gcPtrs);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
- if (gcType == TYPE_GC_REF)
- type = TYP_REF;
- else if (gcType == TYPE_GC_BYREF)
- type = TYP_BYREF;
+ regSet.rsMarkRegUsed(op2);
- if (helperUsed)
- {
- assert(regDst == REG_ARG_0);
- assert(regSrc == REG_ARG_1);
- assert(regTemp == REG_R2);
- }
-#else
- gcType = TYPE_GC_NONE;
-#endif // FEATURE_WRITE_BARRIER
+ // Do side effects of op1
+ genEvalSideEffects(op1);
- blkSize -= TARGET_POINTER_SIZE;
+ // Recover op2 if spilled
+ genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
- emitAttr opSize = emitTypeSize(type);
+ regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
- if (!helperUsed || (gcType == TYPE_GC_NONE))
- {
- getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
- getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
- offset += TARGET_POINTER_SIZE;
+ // set gc info if we need so
+ gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
- if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) ||
- ((offset >= 128) && (blkSize > 0)))
- {
- getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
- getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
- offset = 0;
- }
- }
- else
- {
- assert(offset == 0);
+ genUpdateLife(tree);
+ genCodeForTree_DONE(tree, op2->gtRegNum);
- // The helper will act like this:
- // -- inputs R0 and R1 are byrefs
- // -- helper will perform copy from *R1 into *R0
- // -- helper will perform post increment of R0 and R1 by 4
- // -- helper will trash R2
- // -- helper will trash R3
- // -- calling the helper implicitly trashes LR
- //
- assert(helperUsed);
- regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
- regSet.rsLockUsedReg(argRegs);
- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
- 0, // argSize
- EA_PTRSIZE); // retSize
+ return;
+ }
+ else
+ {
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
- regSet.rsUnlockUsedReg(argRegs);
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
- }
- }
+ /* Generate side effects of the first operand */
- regTracker.rsTrackRegTrash(regDst);
- regTracker.rsTrackRegTrash(regSrc);
- regTracker.rsTrackRegTrash(regTemp);
+ genEvalSideEffects(op1);
+ genUpdateLife (op1);
- gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
+ /* Is the value of the second operand used? */
- /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
- it is a emitNoGChelper. However, we have to let the emitter know that
- the GC liveness has changed. We do this by creating a new label.
- */
+ if (tree->gtType == TYP_VOID)
+ {
+ /* The right operand produces no result. The morpher is
+ responsible for resetting the type of GT_COMMA nodes
+ to TYP_VOID if op2 isn't meant to yield a result. */
- noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+ genEvalSideEffects(op2);
+ genUpdateLife(tree);
+ return;
+ }
- genDefineTempLabel(&dummyBB);
+ /* Generate the second operand, i.e. the 'real' value */
-#endif // !CPU_USES_BLOCK_MOVE
+ genCodeForTree(op2, needReg);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
- assert(blkSize == 0);
+ /* The result of 'op2' is also the final result */
- genReleaseReg(dstObj);
- genReleaseReg(srcObj);
+ reg = op2->gtRegNum;
- reg = REG_NA;
+ /* Remember whether we set the flags */
- genCodeForTree_DONE(tree, reg);
+ tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
-#ifdef _TARGET_ARM_
- if (tree->AsBlkOp()->IsVolatile())
- {
- // Emit a memory barrier instruction after the CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
+ genCodeForTree_DONE(tree, reg);
+ return;
}
- return;
-
- case GT_COPYBLK:
- case GT_INITBLK:
- noway_assert(oper == GT_COPYBLK || oper == GT_INITBLK);
- noway_assert(op1->IsList());
+ case GT_BOX:
+ genCodeForTree(op1, needReg);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
-#ifdef _TARGET_ARM_
- if (tree->AsBlkOp()->IsVolatile())
- {
- // Emit a memory barrier instruction before the InitBlk/CopyBlk
- instGen_MemoryBarrier();
- }
-#endif
- {
- GenTreePtr destPtr, srcPtrOrVal;
- destPtr = op1->gtOp.gtOp1;
- srcPtrOrVal = op1->gtOp.gtOp2;
- noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
- noway_assert((oper == GT_COPYBLK &&
- (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet())))
- ||
- (oper == GT_INITBLK &&
- varTypeIsIntegral(srcPtrOrVal->TypeGet())));
+ /* The result of 'op1' is also the final result */
- noway_assert(op1 && op1->IsList());
- noway_assert(destPtr && srcPtrOrVal);
+ reg = op1->gtRegNum;
-#if CPU_USES_BLOCK_MOVE
- regs = (oper == GT_INITBLK) ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
+ /* Remember whether we set the flags */
- /* Some special code for block moves/inits for constant sizes */
+ tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
- //
- // Is this a fixed size COPYBLK?
- // or a fixed size INITBLK with a constant init value?
- //
- if ((op2->IsCnsIntOrI()) &&
- ((oper == GT_COPYBLK) || (srcPtrOrVal->IsCnsIntOrI())))
- {
- size_t length = (size_t)op2->gtIntCon.gtIconVal;
- size_t initVal = 0;
- instruction ins_P, ins_PR, ins_B;
+ genCodeForTree_DONE(tree, reg);
+ return;
- if (oper == GT_INITBLK)
- {
- ins_P = INS_stosp;
- ins_PR = INS_r_stosp;
- ins_B = INS_stosb;
+ case GT_QMARK:
- /* Properly extend the init constant from a U1 to a U4 */
- initVal = 0xFF & ((unsigned)op1->gtOp.gtOp2->gtIntCon.gtIconVal);
+ genCodeForQmark(tree, destReg, bestReg);
+ return;
- /* If it is a non-zero value we have to replicate */
- /* the byte value four times to form the DWORD */
- /* Then we change this new value into the tree-node */
+ case GT_NOP:
- if (initVal)
- {
- initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
-#ifdef _TARGET_64BIT_
- if (length > 4)
- {
- initVal = initVal | (initVal << 32);
- op1->gtOp.gtOp2->gtType = TYP_LONG;
- }
- else
- {
- op1->gtOp.gtOp2->gtType = TYP_INT;
- }
-#endif // _TARGET_64BIT_
- }
- op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
- }
- else
- {
- ins_P = INS_movsp;
- ins_PR = INS_r_movsp;
- ins_B = INS_movsb;
- }
+#if OPT_BOOL_OPS
+ if (op1 == NULL)
+ return;
+#endif
- // Determine if we will be using SSE2
- unsigned movqLenMin = 8;
- unsigned movqLenMax = 24;
+ /* Generate the operand into some register */
- bool bWillUseSSE2 = false;
- bool bWillUseOnlySSE2 = false;
- bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
+ genCodeForTree(op1, needReg);
-#ifdef _TARGET_64BIT_
+ /* The result is the same as the operand */
- // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
- // there is no point in wasting space on the bigger instructions
+ reg = op1->gtRegNum;
-#else // !_TARGET_64BIT_
+ genCodeForTree_DONE(tree, reg);
+ return;
- if (compiler->opts.compCanUseSSE2)
- {
- unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
+ case GT_INTRINSIC:
- /* Adjust for BB weight */
- if (curBBweight == BB_ZERO_WEIGHT)
- {
- // Don't bother with this optimization in
- // rarely run blocks
- movqLenMax = movqLenMin = 0;
- }
- else if (curBBweight < BB_UNITY_WEIGHT)
- {
- // Be less aggressive when we are inside a conditional
- movqLenMax = 16;
- }
- else if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT) / 2)
- {
- // Be more aggressive when we are inside a loop
- movqLenMax = 48;
- }
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Round:
+ {
+ noway_assert(tree->gtType == TYP_INT);
- if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || (oper == GT_INITBLK))
- {
- // Be more aggressive when optimizing for speed
- // InitBlk uses fewer instructions
- movqLenMax += 16;
- }
+#if FEATURE_STACK_FP_X87
+ genCodeForTreeFlt(op1);
- if (compiler->compCodeOpt() != Compiler::SMALL_CODE &&
- length >= movqLenMin &&
- length <= movqLenMax)
- {
- bWillUseSSE2 = true;
+ /* Store the FP value into the temp */
+ TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
- if ((length % 8) == 0)
- {
- bWillUseOnlySSE2 = true;
- if (oper == GT_INITBLK && (initVal == 0))
- {
- bNeedEvaluateCnst = false;
- noway_assert((op1->gtOp.gtOp2->OperGet() == GT_CNS_INT));
- }
- }
- }
- }
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+ FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
+ inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
-#endif // !_TARGET_64BIT_
+ reg = regSet.rsPickReg(needReg, bestReg);
+ regTracker.rsTrackRegTrash(reg);
- const bool bWillTrashRegSrc = ((oper == GT_COPYBLK) && !bWillUseOnlySSE2);
- /* Evaluate dest and src/val */
+ inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
- if (op1->gtFlags & GTF_REVERSE_OPS)
- {
- if (bNeedEvaluateCnst)
- {
- genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
- }
- genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
- if (bNeedEvaluateCnst)
- {
- genRecoverReg(op1->gtOp.gtOp2, regs, RegSet::KEEP_REG);
- }
- }
- else
- {
- genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
- if (bNeedEvaluateCnst)
- {
- genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
- }
- genRecoverReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::KEEP_REG);
- }
+ compiler->tmpRlsTemp(temp);
+#else
+ genCodeForTreeFloat(tree, needReg, bestReg);
+ return;
+#endif
+ }
+ break;
- bool bTrashedESI = false;
- bool bTrashedEDI = false;
+ default:
+ noway_assert(!"unexpected math intrinsic");
- if (bWillUseSSE2)
- {
- int blkDisp = 0;
- regNumber xmmReg = REG_XMM0;
+ }
- if (oper == GT_INITBLK)
- {
- if (initVal)
- {
- getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
- getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
- }
- else
- {
- getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
- }
- }
+ genCodeForTree_DONE(tree, reg);
+ return;
- JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
- length, (oper == GT_INITBLK) ? "initblk" : "copyblk", compiler->info.compFullName));
+ case GT_LCLHEAP:
- while (length > 7)
- {
- if (oper == GT_INITBLK)
- {
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
- }
- else
- {
- getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
- getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
- }
- blkDisp += 8;
- length -= 8;
- }
+ reg = genLclHeap(op1);
+ genCodeForTree_DONE(tree, reg);
+ return;
- if (length > 0)
- {
- noway_assert(bNeedEvaluateCnst);
- noway_assert(!bWillUseOnlySSE2);
+ case GT_COPYOBJ:
+ noway_assert(op1->IsList());
- if (oper == GT_COPYBLK)
- {
- inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
- bTrashedESI = true;
- }
+ /* If the value class doesn't have any fields that are GC refs or
+ the target isn't on the GC-heap, we can merge it with CPBLK.
+ GC fields cannot be copied directly, instead we will
+ need to use a jit-helper for that. */
+ assert(tree->AsCpObj()->gtGcPtrCount > 0);
- inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
- bTrashedEDI = true;
+ {
+ GenTreeCpObj* cpObjOp = tree->AsCpObj();
- if (length >= REGSIZE_BYTES)
- {
- instGen(ins_P);
- length -= REGSIZE_BYTES;
- }
- }
- }
- else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
- {
- /* For small code, we can only use ins_DR to generate fast
- and small code. We also can't use "rep movsb" because
- we may not atomically reading and writing the DWORD */
+#ifdef _TARGET_ARM_
+ if (cpObjOp->IsVolatile())
+ {
+ // Emit a memory barrier instruction before the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif
+ GenTreePtr srcObj = cpObjOp->Source();
+ GenTreePtr dstObj = cpObjOp->Dest();
- noway_assert(bNeedEvaluateCnst);
+ noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
- goto USE_DR;
- }
- else if (length <= 4 * REGSIZE_BYTES)
- {
- noway_assert(bNeedEvaluateCnst);
+#ifdef DEBUG
+ CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
+ size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
- while (length >= REGSIZE_BYTES)
- {
- instGen(ins_P);
- length -= REGSIZE_BYTES;
- }
+ // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
+ // The EE currently does not allow this. Let's assert it just to be safe.
+ noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
+#endif
- bTrashedEDI = true;
- if (oper == GT_COPYBLK)
- bTrashedESI = true;
- }
- else
- {
- USE_DR:
- noway_assert(bNeedEvaluateCnst);
+ size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
+ unsigned slots = cpObjOp->gtSlots;
+ BYTE * gcPtrs = cpObjOp->gtGcPtrs;
+ unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
+
+ // If we have GC pointers then the GTF_BLK_HASGCPTR flags must be set
+ if (gcPtrCount > 0)
+ assert((tree->gtFlags & GTF_BLK_HASGCPTR) != 0);
- /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
- genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
+ GenTreePtr treeFirst, treeSecond;
+ regNumber regFirst, regSecond;
+
+ // Check what order the object-ptrs have to be evaluated in ?
- length &= (REGSIZE_BYTES - 1);
+ if (op1->gtFlags & GTF_REVERSE_OPS)
+ {
+ treeFirst = srcObj;
+ treeSecond = dstObj;
+#if CPU_USES_BLOCK_MOVE
+ regFirst = REG_ESI;
+ regSecond = REG_EDI;
+#else
+ regFirst = REG_ARG_1;
+ regSecond = REG_ARG_0;
+#endif
+ }
+ else
+ {
+ treeFirst = dstObj;
+ treeSecond = srcObj;
+#if CPU_USES_BLOCK_MOVE
+ regFirst = REG_EDI;
+ regSecond = REG_ESI;
+#else
+ regFirst = REG_ARG_0;
+ regSecond = REG_ARG_1;
+#endif
+ }
- instGen(ins_PR);
+ bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
+ bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
+ emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- regTracker.rsTrackRegTrash(REG_ECX);
+ // Materialize the trees in the order desired
- bTrashedEDI = true;
- if (oper == GT_COPYBLK)
- bTrashedESI = true;
- }
+#if CPU_USES_BLOCK_MOVE
+ genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
- /* Now take care of the remainder */
+ // Grab ECX because it will be trashed by the helper
+ //
+ regSet.rsGrabReg(RBM_ECX);
-#ifdef _TARGET_64BIT_
- if (length > 4)
+ while (blkSize >= TARGET_POINTER_SIZE)
+ {
+ if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
{
- noway_assert(bNeedEvaluateCnst);
- noway_assert(length < 8);
-
- instGen((oper == GT_INITBLK) ? INS_stosd : INS_movsd);
- length -= 4;
-
- bTrashedEDI = true;
- if (oper == GT_COPYBLK)
- bTrashedESI = true;
+ // Note that we can use movsd even if it is a GC pointer being transfered
+ // because the value is not cached anywhere. If we did this in two moves,
+ // we would have to make certain we passed the appropriate GC info on to
+ // the emitter.
+ instGen(INS_movsp);
}
-
-#endif // _TARGET_64BIT_
-
- if (length)
+ else
{
- noway_assert(bNeedEvaluateCnst);
-
- while (length--)
- {
- instGen(ins_B);
- }
-
- bTrashedEDI = true;
- if (oper == GT_COPYBLK)
- bTrashedESI = true;
+ // This helper will act like a MOVSD
+ // -- inputs EDI and ESI are byrefs
+ // -- including incrementing of ESI and EDI by 4
+ // -- helper will trash ECX
+ //
+ regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+ regSet.rsLockUsedReg(argRegs);
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+ regSet.rsUnlockUsedReg(argRegs);
}
- noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
- if (bTrashedEDI)
- regTracker.rsTrackRegTrash(REG_EDI);
- if (bTrashedESI)
- regTracker.rsTrackRegTrash(REG_ESI);
- // else No need to trash EAX as it wasnt destroyed by the "rep stos"
-
- genReleaseReg(op1->gtOp.gtOp1);
- if (bNeedEvaluateCnst) genReleaseReg(op1->gtOp.gtOp2);
-
+ blkSize -= TARGET_POINTER_SIZE;
}
- else
- {
- //
- // This a variable-sized COPYBLK/INITBLK,
- // or a fixed size INITBLK with a variable init value,
- //
-
- // What order should the Dest, Val/Src, and Size be calculated
- compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX,
- opsPtr, regsPtr); // OUT arguments
-
- noway_assert(((oper == GT_INITBLK) && (regs == RBM_EAX)) || ((oper == GT_COPYBLK) && (regs == RBM_ESI)));
- genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
- genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
- genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
-
- genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
- genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
-
- noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest
- (op1->gtOp.gtOp1->gtRegNum == REG_EDI));
-
- noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src
- (genRegMask(op1->gtOp.gtOp2->gtRegNum) == regs));
+ // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
- noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size
- (op2->gtRegNum == REG_ECX));
+ regTracker.rsTrackRegTrash(REG_EDI);
+ regTracker.rsTrackRegTrash(REG_ESI);
+ regTracker.rsTrackRegTrash(REG_ECX);
- if (oper == GT_INITBLK)
- instGen(INS_r_stosb);
- else
- instGen(INS_r_movsb);
+ gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
- regTracker.rsTrackRegTrash(REG_EDI);
- regTracker.rsTrackRegTrash(REG_ECX);
+ /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+ it is a emitNoGChelper. However, we have to let the emitter know that
+ the GC liveness has changed. We do this by creating a new label.
+ */
- if (oper == GT_COPYBLK)
- regTracker.rsTrackRegTrash(REG_ESI);
- // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+ noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
- genReleaseReg(opsPtr[0]);
- genReleaseReg(opsPtr[1]);
- genReleaseReg(opsPtr[2]);
- }
+ genDefineTempLabel(&dummyBB);
-#else // !CPU_USES_BLOCK_MOVE
+#else // !CPU_USES_BLOCK_MOVE
#ifndef _TARGET_ARM_
// Currently only the ARM implementation is provided
-#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
+#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
#endif
- //
- // Is this a fixed size COPYBLK?
- // or a fixed size INITBLK with a constant init value?
- //
- if ((op2->OperGet() == GT_CNS_INT) &&
- ((oper == GT_COPYBLK) || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
+
+ bool helperUsed;
+ regNumber regDst;
+ regNumber regSrc;
+ regNumber regTemp;
+
+ if ((gcPtrCount > 0) && !dstIsOnStack)
{
- GenTreePtr dstOp = op1->gtOp.gtOp1;
- GenTreePtr srcOp = op1->gtOp.gtOp2;
- unsigned length = (unsigned)op2->gtIntCon.gtIconVal;
- unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
- unsigned initVal = 0;
- bool useLoop = false;
+ genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
- if (oper == GT_INITBLK)
+ /* The helper is a Asm-routine that will trash R2,R3 and LR */
{
- /* Properly extend the init constant from a U1 to a U4 */
- initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
-
- /* If it is a non-zero value we have to replicate */
- /* the byte value four times to form the DWORD */
- /* Then we store this new value into the tree-node */
+ /* Spill any callee-saved registers which are being used */
+ regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
- if (initVal != 0)
+ if (spillRegs)
{
- initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
- op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal;
+ regSet.rsSpillRegs(spillRegs);
}
}
- // Will we be using a loop to implement this INITBLK/COPYBLK?
- if (((oper == GT_COPYBLK) && (fullStoreCount >= 8)) ||
- ((oper == GT_INITBLK) && (fullStoreCount >= 16)))
- {
- useLoop = true;
- }
-
- regMaskTP usedRegs;
- regNumber regDst;
- regNumber regSrc;
- regNumber regTemp;
+ // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
+ // We will also use it as the temp register for our load/store sequences
+ //
+ assert(REG_R2 == REG_TMP_1);
+ regTemp = regSet.rsGrabReg(RBM_R2);
+ helperUsed = true;
+ }
+ else
+ {
+ genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
+ genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
+ genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
- /* Evaluate dest and src/val */
+ // Grab any temp register to use for our load/store sequences
+ //
+ regTemp = regSet.rsGrabReg(RBM_ALLINT);
+ helperUsed = false;
+ }
+ assert(dstObj->gtFlags & GTF_REG_VAL);
+ assert(srcObj->gtFlags & GTF_REG_VAL);
- if (op1->gtFlags & GTF_REVERSE_OPS)
- {
- genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(srcOp->gtFlags & GTF_REG_VAL);
+ regDst = dstObj->gtRegNum;
+ regSrc = srcObj->gtRegNum;
- genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(dstOp->gtFlags & GTF_REG_VAL);
- regDst = dstOp->gtRegNum;
+ assert(regDst != regTemp);
+ assert(regSrc != regTemp);
- genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
- regSrc = srcOp->gtRegNum;
- }
- else
- {
- genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(dstOp->gtFlags & GTF_REG_VAL);
+ instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
+ instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
- genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
- assert(srcOp->gtFlags & GTF_REG_VAL);
- regSrc = srcOp->gtRegNum;
+ size_t offset = 0;
+ while (blkSize >= TARGET_POINTER_SIZE)
+ {
+ CorInfoGCType gcType;
+ CorInfoGCType gcTypeNext = TYPE_GC_NONE;
+ var_types type = TYP_I_IMPL;
- genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
- regDst = dstOp->gtRegNum;
- }
- assert(dstOp->gtFlags & GTF_REG_VAL);
- assert(srcOp->gtFlags & GTF_REG_VAL);
+#if FEATURE_WRITE_BARRIER
+ gcType = (CorInfoGCType)(*gcPtrs++);
+ if (blkSize > TARGET_POINTER_SIZE)
+ gcTypeNext = (CorInfoGCType)(*gcPtrs);
- regDst = dstOp->gtRegNum;
- regSrc = srcOp->gtRegNum;
- usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
- bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
- emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- emitAttr srcType;
+ if (gcType == TYPE_GC_REF)
+ type = TYP_REF;
+ else if (gcType == TYPE_GC_BYREF)
+ type = TYP_BYREF;
- if (oper == GT_COPYBLK)
- {
- // Prefer a low register,but avoid one of the ones we've already grabbed
- regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- usedRegs |= genRegMask(regTemp);
- bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
- srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
- }
- else
+ if (helperUsed)
{
- regTemp = REG_STK;
- srcType = EA_PTRSIZE;
+ assert(regDst == REG_ARG_0);
+ assert(regSrc == REG_ARG_1);
+ assert(regTemp == REG_R2);
}
+#else
+ gcType = TYPE_GC_NONE;
+#endif // FEATURE_WRITE_BARRIER
- instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
- instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
-
- int finalOffset;
+ blkSize -= TARGET_POINTER_SIZE;
- // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
- if (!useLoop)
- {
- for (unsigned i = 0; i < fullStoreCount; i++)
- {
- if (oper == GT_COPYBLK)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
- }
- }
+ emitAttr opSize = emitTypeSize(type);
- finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
- length -= finalOffset;
- }
- else // We will use a loop to implement this INITBLK/COPYBLK
+ if (!helperUsed || (gcType == TYPE_GC_NONE))
{
- unsigned pairStoreLoopCount = fullStoreCount / 2;
-
- // We need a second temp register for CopyBlk
- regNumber regTemp2 = REG_STK;
- if (oper == GT_COPYBLK)
- {
- // Prefer a low register, but avoid one of the ones we've already grabbed
- regTemp2 = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- usedRegs |= genRegMask(regTemp2);
- }
-
- // Pick and initialize the loop counter register
- regNumber regLoopIndex;
- regLoopIndex = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
- genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
-
- // Create and define the Basic Block for the loop top
- BasicBlock * loopTopBlock = genCreateTempLabel();
- genDefineTempLabel(loopTopBlock);
-
- // The loop body
- if (oper == GT_COPYBLK)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
- getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
- regTracker.rsTrackRegTrash(regSrc);
- regTracker.rsTrackRegTrash(regTemp);
- regTracker.rsTrackRegTrash(regTemp2);
- }
- else // GT_INITBLK
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
- }
-
- getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
- regTracker.rsTrackRegTrash(regDst);
- getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
- emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
- inst_JMP(jmpGTS, loopTopBlock);
-
- regTracker.rsTrackRegIntCns(regLoopIndex, 0);
-
- length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
+ getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
+ getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
+ offset += TARGET_POINTER_SIZE;
- if (length & TARGET_POINTER_SIZE)
- {
- if (oper == GT_COPYBLK)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
- }
- finalOffset = TARGET_POINTER_SIZE;
- length -= TARGET_POINTER_SIZE;
- }
- else
+ if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) ||
+ ((offset >= 128) && (blkSize > 0)))
{
- finalOffset = 0;
+ getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
+ getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
+ offset = 0;
}
}
-
- if (length & sizeof(short))
+ else
{
- loadIns = ins_Load(TYP_USHORT); // INS_ldrh
- storeIns = ins_Store(TYP_USHORT); // INS_strh
-
- if (oper == GT_COPYBLK)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
- getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
- }
- length -= sizeof(short);
- finalOffset += sizeof(short);
- }
+ assert(offset == 0);
- if (length & sizeof(char))
- {
- loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
- storeIns = ins_Store(TYP_UBYTE); // INS_strb
+ // The helper will act like this:
+ // -- inputs R0 and R1 are byrefs
+ // -- helper will perform copy from *R1 into *R0
+ // -- helper will perform post increment of R0 and R1 by 4
+ // -- helper will trash R2
+ // -- helper will trash R3
+ // -- calling the helper implicitly trashes LR
+ //
+ assert(helperUsed);
+ regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+ regSet.rsLockUsedReg(argRegs);
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
- if (oper == GT_COPYBLK)
- {
- getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
- getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
- gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
- regTracker.rsTrackRegTrash(regTemp);
- }
- else
- {
- getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
- }
- length -= sizeof(char);
+ regSet.rsUnlockUsedReg(argRegs);
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
}
- assert(length == 0);
-
- genReleaseReg(dstOp);
- genReleaseReg(srcOp);
}
- else
- {
- //
- // This a variable-sized COPYBLK/INITBLK,
- // or a fixed size INITBLK with a variable init value,
- //
-
- // What order should the Dest, Val/Src, and Size be calculated
- compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2,
- opsPtr, regsPtr); // OUT arguments
+ regTracker.rsTrackRegTrash(regDst);
+ regTracker.rsTrackRegTrash(regSrc);
+ regTracker.rsTrackRegTrash(regTemp);
- genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
- genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
- genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
- genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
- genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
+ /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+ it is a emitNoGChelper. However, we have to let the emitter know that
+ the GC liveness has changed. We do this by creating a new label.
+ */
- noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest
- (op1->gtOp.gtOp1->gtRegNum == REG_ARG_0));
+ noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
- noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src
- (op1->gtOp.gtOp2->gtRegNum == REG_ARG_1));
+ genDefineTempLabel(&dummyBB);
- noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size
- (op2->gtRegNum == REG_ARG_2));
+#endif // !CPU_USES_BLOCK_MOVE
- regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+ assert(blkSize == 0);
- genEmitHelperCall(oper == GT_COPYBLK ? CORINFO_HELP_MEMCPY
- /* GT_INITBLK */ : CORINFO_HELP_MEMSET,
- 0, EA_UNKNOWN);
+ genReleaseReg(dstObj);
+ genReleaseReg(srcObj);
- regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
+ reg = REG_NA;
- regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
- genReleaseReg(opsPtr[0]);
- genReleaseReg(opsPtr[1]);
- genReleaseReg(opsPtr[2]);
- }
+ genCodeForTree_DONE(tree, reg);
- if ((oper == GT_COPYBLK) && tree->AsBlkOp()->IsVolatile())
+#ifdef _TARGET_ARM_
+ if (tree->AsBlkOp()->IsVolatile())
{
// Emit a memory barrier instruction after the CopyBlk
instGen_MemoryBarrier();
}
-#endif // !CPU_USES_BLOCK_MOVE
-
- reg = REG_NA;
+#endif
}
+ return;
+
+ case GT_COPYBLK:
+ case GT_INITBLK:
- genCodeForTree_DONE(tree, reg);
+ genCodeForBlkOp(tree, destReg);
+ genCodeForTree_DONE(tree, REG_NA);
return;
case GT_EQ: