From: Carol Eidt Date: Fri, 22 Apr 2016 18:40:46 +0000 (-0700) Subject: Block Ops Refactor X-Git-Tag: accepted/tizen/base/20180629.140029~4918^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3f6626d1297a3ae02a51830ff2e017462bb79dd5;p=platform%2Fupstream%2Fcoreclr.git Block Ops Refactor Refactor the legacy code dealing with block ops to minimize diffs when they are replaced with assignments. Also a couple of comment edits and a small cleanup in rationalizer. --- diff --git a/src/jit/codegenclassic.h b/src/jit/codegenclassic.h index 74a3273..e23c2d3 100644 --- a/src/jit/codegenclassic.h +++ b/src/jit/codegenclassic.h @@ -286,6 +286,9 @@ regMaskTP destReg, regMaskTP bestReg); + void genCodeForBlkOp (GenTreePtr tree, + regMaskTP destReg); + void genCodeForTreeSmpOp (GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE); diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index 5a0198f..f2a704a 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -8853,1511 +8853,1520 @@ void CodeGen::genCodeForRelop(GenTreePtr tree, genCodeForTree_DONE(tree, reg); } - -BasicBlock dummyBB; - -#ifdef _PREFAST_ -#pragma warning(push) -#pragma warning(disable:21000) // Suppress PREFast warning about overly large function -#endif -void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, - regMaskTP destReg, - regMaskTP bestReg) +void CodeGen::genCodeForBlkOp(GenTreePtr tree, + regMaskTP destReg) { - const genTreeOps oper = tree->OperGet(); - const var_types treeType = tree->TypeGet(); + genTreeOps oper = tree->OperGet(); GenTreePtr op1 = tree->gtOp.gtOp1; GenTreePtr op2 = tree->gtGetOp2(); - regNumber reg = DUMMY_INIT(REG_CORRUPT); - regMaskTP regs = regSet.rsMaskUsed; regMaskTP needReg = destReg; - insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; - emitAttr size; - instruction ins; - regMaskTP addrReg; + regMaskTP regs = regSet.rsMaskUsed; GenTreePtr opsPtr[3]; regMaskTP regsPtr[3]; -#ifdef DEBUG - addrReg = 0xDEADCAFE; + noway_assert(oper == GT_COPYBLK || oper == GT_INITBLK); + noway_assert(op1->IsList()); + +#ifdef _TARGET_ARM_ + if (tree->AsBlkOp()->IsVolatile()) + { + // Emit a memory barrier instruction before the InitBlk/CopyBlk + instGen_MemoryBarrier(); + } #endif + { + GenTreePtr destPtr, srcPtrOrVal; + destPtr = op1->gtOp.gtOp1; + srcPtrOrVal = op1->gtOp.gtOp2; + noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet())); + noway_assert((oper == GT_COPYBLK && + (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) + || + (oper == GT_INITBLK && + varTypeIsIntegral(srcPtrOrVal->TypeGet()))); - noway_assert(tree->OperKind() & GTK_SMPOP); + noway_assert(op1 && op1->IsList()); + noway_assert(destPtr && srcPtrOrVal); - switch (oper) - { - case GT_ASG: - genCodeForTreeSmpOpAsg(tree); - return; +#if CPU_USES_BLOCK_MOVE + regs = (oper == GT_INITBLK) ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src - case GT_ASG_LSH: - case GT_ASG_RSH: - case GT_ASG_RSZ: - genCodeForAsgShift(tree, destReg, bestReg); - return; + /* Some special code for block moves/inits for constant sizes */ - case GT_ASG_AND: - case GT_ASG_OR : - case GT_ASG_XOR: - case GT_ASG_ADD: - case GT_ASG_SUB: - genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg); - return; + // + // Is this a fixed size COPYBLK? + // or a fixed size INITBLK with a constant init value? + // + if ((op2->IsCnsIntOrI()) && + ((oper == GT_COPYBLK) || (srcPtrOrVal->IsCnsIntOrI()))) + { + size_t length = (size_t)op2->gtIntCon.gtIconVal; + size_t initVal = 0; + instruction ins_P, ins_PR, ins_B; - case GT_CHS: - addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true); -#ifdef _TARGET_XARCH_ - // Note that the specialCase here occurs when the treeType specifies a byte sized operation - // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI) - // - bool specialCase; specialCase = false; - if (op1->gtOper == GT_REG_VAR) + if (oper == GT_INITBLK) { - /* Get hold of the target register */ + ins_P = INS_stosp; + ins_PR = INS_r_stosp; + ins_B = INS_stosb; - reg = op1->gtRegVar.gtRegNum; - if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS)) - { - regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + /* Properly extend the init constant from a U1 to a U4 */ + initVal = 0xFF & ((unsigned)op1->gtOp.gtOp2->gtIntCon.gtIconVal); - inst_RV_RV(INS_mov, byteReg, reg); - regTracker.rsTrackRegTrash(byteReg); + /* If it is a non-zero value we have to replicate */ + /* the byte value four times to form the DWORD */ + /* Then we change this new value into the tree-node */ - inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType)); - var_types op1Type = op1->TypeGet(); - instruction wideningIns = ins_Move_Extend(op1Type, true); - inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type)); - regTracker.rsTrackRegTrash(reg); - specialCase = true; + if (initVal) + { + initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); +#ifdef _TARGET_64BIT_ + if (length > 4) + { + initVal = initVal | (initVal << 32); + op1->gtOp.gtOp2->gtType = TYP_LONG; + } + else + { + op1->gtOp.gtOp2->gtType = TYP_INT; + } +#endif // _TARGET_64BIT_ } - } - - if (!specialCase) - { - inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType)); - } -#else // not _TARGET_XARCH_ - if (op1->gtFlags & GTF_REG_VAL) - { - inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags); + op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal; } else { - // Fix 388382 ARM JitStress WP7 - var_types op1Type = op1->TypeGet(); - regNumber reg = regSet.rsPickFreeReg(); - inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type)); - regTracker.rsTrackRegTrash(reg); - inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags); - inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type)); + ins_P = INS_movsp; + ins_PR = INS_r_movsp; + ins_B = INS_movsb; } -#endif - if (op1->gtFlags & GTF_REG_VAL) - regTracker.rsTrackRegTrash(op1->gtRegNum); - genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); - - genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false); - return; - - case GT_AND: - case GT_OR : - case GT_XOR: - case GT_ADD: - case GT_SUB: - case GT_MUL: - genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg); - return; - case GT_UMOD: - genCodeForUnsignedMod(tree, destReg, bestReg); - return; + // Determine if we will be using SSE2 + unsigned movqLenMin = 8; + unsigned movqLenMax = 24; - case GT_MOD: - genCodeForSignedMod(tree, destReg, bestReg); - return; + bool bWillUseSSE2 = false; + bool bWillUseOnlySSE2 = false; + bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there. - case GT_UDIV: - genCodeForUnsignedDiv(tree, destReg, bestReg); - return; +#ifdef _TARGET_64BIT_ - case GT_DIV: - genCodeForSignedDiv(tree, destReg, bestReg); - return; + // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8 + // there is no point in wasting space on the bigger instructions - case GT_LSH: - case GT_RSH: - case GT_RSZ: - genCodeForShift(tree, destReg, bestReg); - return; +#else // !_TARGET_64BIT_ - case GT_NEG: - case GT_NOT: + if (compiler->opts.compCanUseSSE2) + { + unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler); - /* Generate the operand into some register */ + /* Adjust for BB weight */ + if (curBBweight == BB_ZERO_WEIGHT) + { + // Don't bother with this optimization in + // rarely run blocks + movqLenMax = movqLenMin = 0; + } + else if (curBBweight < BB_UNITY_WEIGHT) + { + // Be less aggressive when we are inside a conditional + movqLenMax = 16; + } + else if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT) / 2) + { + // Be more aggressive when we are inside a loop + movqLenMax = 48; + } - genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); - noway_assert(op1->gtFlags & GTF_REG_VAL); + if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || (oper == GT_INITBLK)) + { + // Be more aggressive when optimizing for speed + // InitBlk uses fewer instructions + movqLenMax += 16; + } - reg = op1->gtRegNum; + if (compiler->compCodeOpt() != Compiler::SMALL_CODE && + length >= movqLenMin && + length <= movqLenMax) + { + bWillUseSSE2 = true; - /* Negate/reverse the value in the register */ + if ((length % 8) == 0) + { + bWillUseOnlySSE2 = true; + if (oper == GT_INITBLK && (initVal == 0)) + { + bNeedEvaluateCnst = false; + noway_assert((op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)); + } + } + } + } - inst_RV((oper == GT_NEG) ? INS_NEG - : INS_NOT, reg, treeType); +#endif // !_TARGET_64BIT_ - /* The register is now trashed */ + const bool bWillTrashRegSrc = ((oper == GT_COPYBLK) && !bWillUseOnlySSE2); + /* Evaluate dest and src/val */ - regTracker.rsTrackRegTrash(reg); + if (op1->gtFlags & GTF_REVERSE_OPS) + { + if (bNeedEvaluateCnst) + { + genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); + } + genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); + if (bNeedEvaluateCnst) + { + genRecoverReg(op1->gtOp.gtOp2, regs, RegSet::KEEP_REG); + } + } + else + { + genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); + if (bNeedEvaluateCnst) + { + genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); + } + genRecoverReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::KEEP_REG); + } - genCodeForTree_DONE(tree, reg); - return; + bool bTrashedESI = false; + bool bTrashedEDI = false; - case GT_IND: - case GT_NULLCHECK: // At this point, explicit null checks are just like inds... + if (bWillUseSSE2) + { + int blkDisp = 0; + regNumber xmmReg = REG_XMM0; - /* Make sure the operand is addressable */ + if (oper == GT_INITBLK) + { + if (initVal) + { + getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX); + getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg); + } + else + { + getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg); + } + } - addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true); + JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n", + length, (oper == GT_INITBLK) ? "initblk" : "copyblk", compiler->info.compFullName)); - genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + while (length > 7) + { + if (oper == GT_INITBLK) + { + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); + } + else + { + getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp); + getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); + } + blkDisp += 8; + length -= 8; + } - /* Figure out the size of the value being loaded */ + if (length > 0) + { + noway_assert(bNeedEvaluateCnst); + noway_assert(!bWillUseOnlySSE2); - size = EA_ATTR(genTypeSize(tree->gtType)); + if (oper == GT_COPYBLK) + { + inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet())); + bTrashedESI = true; + } - /* Pick a register for the value */ + inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet())); + bTrashedEDI = true; - if (needReg == RBM_ALLINT && bestReg == 0) + if (length >= REGSIZE_BYTES) + { + instGen(ins_P); + length -= REGSIZE_BYTES; + } + } + } + else if (compiler->compCodeOpt() == Compiler::SMALL_CODE) { - /* Absent a better suggestion, pick a useless register */ + /* For small code, we can only use ins_DR to generate fast + and small code. We also can't use "rep movsb" because + we may not atomically reading and writing the DWORD */ - bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs()); + noway_assert(bNeedEvaluateCnst); + + goto USE_DR; } + else if (length <= 4 * REGSIZE_BYTES) + { + noway_assert(bNeedEvaluateCnst); - reg = regSet.rsPickReg(needReg, bestReg); + while (length >= REGSIZE_BYTES) + { + instGen(ins_P); + length -= REGSIZE_BYTES; + } - if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL)) - { - noway_assert(size == EA_PTRSIZE); - getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL), - EA_PTRSIZE, - reg, - FLD_GLOBAL_FS, - (int)op1->gtIntCon.gtIconVal); + bTrashedEDI = true; + if (oper == GT_COPYBLK) + bTrashedESI = true; } else { - /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */ + USE_DR: + noway_assert(bNeedEvaluateCnst); - inst_mov_RV_ST(reg, tree); - } + /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */ + genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL); -#ifdef _TARGET_ARM_ - if (tree->gtFlags & GTF_IND_VOLATILE) - { - // Emit a memory barrier instruction after the load - instGen_MemoryBarrier(); - } -#endif + length &= (REGSIZE_BYTES - 1); - /* Note the new contents of the register we used */ + instGen(ins_PR); - regTracker.rsTrackRegTrash(reg); + regTracker.rsTrackRegTrash(REG_ECX); - /* Update the live set of register variables */ + bTrashedEDI = true; + if (oper == GT_COPYBLK) + bTrashedESI = true; + } -#ifdef DEBUG - if (compiler->opts.varNames) genUpdateLife(tree); -#endif + /* Now take care of the remainder */ - /* Now we can update the register pointer information */ +#ifdef _TARGET_64BIT_ + if (length > 4) + { + noway_assert(bNeedEvaluateCnst); + noway_assert(length < 8); -// genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); - gcInfo.gcMarkRegPtrVal(reg, treeType); + instGen((oper == GT_INITBLK) ? INS_stosd : INS_movsd); + length -= 4; - genCodeForTree_DONE_LIFE(tree, reg); - return; + bTrashedEDI = true; + if (oper == GT_COPYBLK) + bTrashedESI = true; + } - case GT_CAST: +#endif // _TARGET_64BIT_ - genCodeForNumericCast(tree, destReg, bestReg); - return; + if (length) + { + noway_assert(bNeedEvaluateCnst); + while (length--) + { + instGen(ins_B); + } - case GT_JTRUE: + bTrashedEDI = true; + if (oper == GT_COPYBLK) + bTrashedESI = true; + } - /* Is this a test of a relational operator? */ + noway_assert(bTrashedEDI == !bWillUseOnlySSE2); + if (bTrashedEDI) + regTracker.rsTrackRegTrash(REG_EDI); + if (bTrashedESI) + regTracker.rsTrackRegTrash(REG_ESI); + // else No need to trash EAX as it wasnt destroyed by the "rep stos" - if (op1->OperIsCompare()) - { - /* Generate the conditional jump */ + genReleaseReg(op1->gtOp.gtOp1); + if (bNeedEvaluateCnst) genReleaseReg(op1->gtOp.gtOp2); - genCondJump(op1); + } + else + { + // + // This a variable-sized COPYBLK/INITBLK, + // or a fixed size INITBLK with a variable init value, + // - genUpdateLife(tree); - return; - } + // What order should the Dest, Val/Src, and Size be calculated -#ifdef DEBUG - compiler->gtDispTree(tree); -#endif - NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?"); - break; + compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, + opsPtr, regsPtr); // OUT arguments - case GT_SWITCH: - genCodeForSwitch(tree); - return; + noway_assert(((oper == GT_INITBLK) && (regs == RBM_EAX)) || ((oper == GT_COPYBLK) && (regs == RBM_ESI))); + genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX)); + genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX)); + genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX)); - case GT_RETFILT: - noway_assert(tree->gtType == TYP_VOID || op1 != 0); - if (op1 == 0) // endfinally - { - reg = REG_NA; + genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); + genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); -#ifdef _TARGET_XARCH_ - /* Return using a pop-jmp sequence. As the "try" block calls - the finally with a jmp, this leaves the x86 call-ret stack - balanced in the normal flow of path. */ + noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest + (op1->gtOp.gtOp1->gtRegNum == REG_EDI)); - noway_assert(isFramePointerRequired()); - inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); - inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); -#elif defined(_TARGET_ARM_) - // Nothing needed for ARM -#else - NYI("TARGET"); + noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src + (genRegMask(op1->gtOp.gtOp2->gtRegNum) == regs)); + + noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size + (op2->gtRegNum == REG_ECX)); + + if (oper == GT_INITBLK) + instGen(INS_r_stosb); + else + instGen(INS_r_movsb); + + regTracker.rsTrackRegTrash(REG_EDI); + regTracker.rsTrackRegTrash(REG_ECX); + + if (oper == GT_COPYBLK) + regTracker.rsTrackRegTrash(REG_ESI); + // else No need to trash EAX as it wasnt destroyed by the "rep stos" + + genReleaseReg(opsPtr[0]); + genReleaseReg(opsPtr[1]); + genReleaseReg(opsPtr[2]); + } + +#else // !CPU_USES_BLOCK_MOVE + +#ifndef _TARGET_ARM_ + // Currently only the ARM implementation is provided +#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE" #endif - } - else // endfilter + // + // Is this a fixed size COPYBLK? + // or a fixed size INITBLK with a constant init value? + // + if ((op2->OperGet() == GT_CNS_INT) && + ((oper == GT_COPYBLK) || (srcPtrOrVal->OperGet() == GT_CNS_INT))) + { + GenTreePtr dstOp = op1->gtOp.gtOp1; + GenTreePtr srcOp = op1->gtOp.gtOp2; + unsigned length = (unsigned)op2->gtIntCon.gtIconVal; + unsigned fullStoreCount = length / TARGET_POINTER_SIZE; + unsigned initVal = 0; + bool useLoop = false; + + if (oper == GT_INITBLK) { - genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); - noway_assert(op1->gtFlags & GTF_REG_VAL); - noway_assert(op1->gtRegNum == REG_INTRET); - /* The return value has now been computed */ - reg = op1->gtRegNum; + /* Properly extend the init constant from a U1 to a U4 */ + initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal); - /* Return */ - instGen_Return(0); + /* If it is a non-zero value we have to replicate */ + /* the byte value four times to form the DWORD */ + /* Then we store this new value into the tree-node */ + + if (initVal != 0) + { + initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); + op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal; + } } - genCodeForTree_DONE(tree, reg); - return; + // Will we be using a loop to implement this INITBLK/COPYBLK? + if (((oper == GT_COPYBLK) && (fullStoreCount >= 8)) || + ((oper == GT_INITBLK) && (fullStoreCount >= 16))) + { + useLoop = true; + } - case GT_RETURN: + regMaskTP usedRegs; + regNumber regDst; + regNumber regSrc; + regNumber regTemp; -#if INLINE_NDIRECT + /* Evaluate dest and src/val */ - // TODO: this should be done AFTER we called exit mon so that - // we are sure that we don't have to keep 'this' alive + if (op1->gtFlags & GTF_REVERSE_OPS) + { + genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(srcOp->gtFlags & GTF_REG_VAL); - if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(dstOp->gtFlags & GTF_REG_VAL); + regDst = dstOp->gtRegNum; + + genRecoverReg(srcOp, needReg, RegSet::KEEP_REG); + regSrc = srcOp->gtRegNum; + } + else { - /* either it's an "empty" statement or the return statement - of a synchronized method - */ + genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(dstOp->gtFlags & GTF_REG_VAL); - genPInvokeMethodEpilog(); + genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); + assert(srcOp->gtFlags & GTF_REG_VAL); + regSrc = srcOp->gtRegNum; + + genRecoverReg(dstOp, needReg, RegSet::KEEP_REG); + regDst = dstOp->gtRegNum; } + assert(dstOp->gtFlags & GTF_REG_VAL); + assert(srcOp->gtFlags & GTF_REG_VAL); -#endif + regDst = dstOp->gtRegNum; + regSrc = srcOp->gtRegNum; + usedRegs = (genRegMask(regSrc) | genRegMask(regDst)); + bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK)); + emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; + emitAttr srcType; - /* Is there a return value and/or an exit statement? */ + if (oper == GT_COPYBLK) + { + // Prefer a low register,but avoid one of the ones we've already grabbed + regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + usedRegs |= genRegMask(regTemp); + bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK)); + srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; + } + else + { + regTemp = REG_STK; + srcType = EA_PTRSIZE; + } - if (op1) + instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr + instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str + + int finalOffset; + + // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK? + if (!useLoop) { - if (op1->gtType == TYP_VOID) + for (unsigned i = 0; i < fullStoreCount; i++) { - //We're returning nothing, just generate the block (shared epilog calls). - genCodeForTree(op1, 0); - } -#ifdef _TARGET_ARM_ - else if (op1->gtType == TYP_STRUCT) - { - if (op1->gtOper == GT_CALL) + if (oper == GT_COPYBLK) { - // We have a return call() because we failed to tail call. - // In any case, just generate the call and be done. - assert(compiler->IsHfa(op1)); - genCodeForCall(op1, true); - genMarkTreeInReg(op1, REG_FLOATRET); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); } else { - assert(op1->gtOper == GT_LCL_VAR); - assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum))); - genLoadIntoFltRetRegs(op1); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE); } } - else if (op1->TypeGet() == TYP_FLOAT) - { - // This can only occur when we are returning a non-HFA struct - // that is composed of a single float field and we performed - // struct promotion and enregistered the float field. - // - genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG); - getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum); - } -#endif // _TARGET_ARM_ - else - { - //we can now go through this code for compiler->genReturnBB. I've regularized all the code. - //noway_assert(compiler->compCurBB != compiler->genReturnBB); - - noway_assert(op1->gtType != TYP_VOID); + finalOffset = fullStoreCount * TARGET_POINTER_SIZE; + length -= finalOffset; + } + else // We will use a loop to implement this INITBLK/COPYBLK + { + unsigned pairStoreLoopCount = fullStoreCount / 2; - /* Generate the return value into the return register */ + // We need a second temp register for CopyBlk + regNumber regTemp2 = REG_STK; + if (oper == GT_COPYBLK) + { + // Prefer a low register, but avoid one of the ones we've already grabbed + regTemp2 = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + usedRegs |= genRegMask(regTemp2); + } - genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); + // Pick and initialize the loop counter register + regNumber regLoopIndex; + regLoopIndex = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); + genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT); - /* The result must now be in the return register */ + // Create and define the Basic Block for the loop top + BasicBlock * loopTopBlock = genCreateTempLabel(); + genDefineTempLabel(loopTopBlock); - noway_assert(op1->gtFlags & GTF_REG_VAL); - noway_assert(op1->gtRegNum == REG_INTRET); + // The loop body + if (oper == GT_COPYBLK) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2)); + regTracker.rsTrackRegTrash(regSrc); + regTracker.rsTrackRegTrash(regTemp); + regTracker.rsTrackRegTrash(regTemp2); + } + else // GT_INITBLK + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE); } - /* The return value has now been computed */ + getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(regDst); + getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET); + emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED); + inst_JMP(jmpGTS, loopTopBlock); - reg = op1->gtRegNum; + regTracker.rsTrackRegIntCns(regLoopIndex, 0); - genCodeForTree_DONE(tree, reg); + length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE)); + if (length & TARGET_POINTER_SIZE) + { + if (oper == GT_COPYBLK) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); + } + finalOffset = TARGET_POINTER_SIZE; + length -= TARGET_POINTER_SIZE; + } + else + { + finalOffset = 0; + } } - //The profiling hook does not trash registers, so it's safe to call after we emit the code for - //the GT_RETURN tree. -#ifdef PROFILING_SUPPORTED - if (compiler->compCurBB == compiler->genReturnBB) + if (length & sizeof(short)) { - genProfilingLeaveCallback(); - } -#endif -#ifdef DEBUG - if (compiler->opts.compStackCheckOnRet) - { - noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && - compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && - compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); - getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); + loadIns = ins_Load(TYP_USHORT); // INS_ldrh + storeIns = ins_Store(TYP_USHORT); // INS_strh - BasicBlock * esp_check = genCreateTempLabel(); - emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); - inst_JMP(jmpEqual, esp_check); - getEmitter()->emitIns(INS_BREAKPOINT); - genDefineTempLabel(esp_check); + if (oper == GT_COPYBLK) + { + getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset); + getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); + } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset); + } + length -= sizeof(short); + finalOffset += sizeof(short); } -#endif - return; - - case GT_COMMA: - if (tree->gtFlags & GTF_REVERSE_OPS) + if (length & sizeof(char)) { - if (tree->gtType == TYP_VOID) + loadIns = ins_Load(TYP_UBYTE); // INS_ldrb + storeIns = ins_Store(TYP_UBYTE); // INS_strb + + if (oper == GT_COPYBLK) { - genEvalSideEffects(op2); - genUpdateLife (op2); - genEvalSideEffects(op1); - genUpdateLife(tree); - return; + getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset); + getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset); + gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); + regTracker.rsTrackRegTrash(regTemp); } + else + { + getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset); + } + length -= sizeof(char); + } + assert(length == 0); - // Generate op2 - genCodeForTree(op2, needReg); - genUpdateLife(op2); - - noway_assert(op2->gtFlags & GTF_REG_VAL); - - regSet.rsMarkRegUsed(op2); - - // Do side effects of op1 - genEvalSideEffects(op1); + genReleaseReg(dstOp); + genReleaseReg(srcOp); + } + else + { + // + // This a variable-sized COPYBLK/INITBLK, + // or a fixed size INITBLK with a variable init value, + // - // Recover op2 if spilled - genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG); + // What order should the Dest, Val/Src, and Size be calculated - regSet.rsMarkRegFree(genRegMask(op2->gtRegNum)); + compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, + opsPtr, regsPtr); // OUT arguments - // set gc info if we need so - gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType); + genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG); + genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG); + genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG); - genUpdateLife(tree); - genCodeForTree_DONE(tree, op2->gtRegNum); + genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); + genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); - return; - } - else - { - noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest + (op1->gtOp.gtOp1->gtRegNum == REG_ARG_0)); - /* Generate side effects of the first operand */ + noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src + (op1->gtOp.gtOp2->gtRegNum == REG_ARG_1)); - genEvalSideEffects(op1); - genUpdateLife (op1); + noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size + (op2->gtRegNum == REG_ARG_2)); - /* Is the value of the second operand used? */ + regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); - if (tree->gtType == TYP_VOID) - { - /* The right operand produces no result. The morpher is - responsible for resetting the type of GT_COMMA nodes - to TYP_VOID if op2 isn't meant to yield a result. */ + genEmitHelperCall(oper == GT_COPYBLK ? CORINFO_HELP_MEMCPY + /* GT_INITBLK */ : CORINFO_HELP_MEMSET, + 0, EA_UNKNOWN); - genEvalSideEffects(op2); - genUpdateLife(tree); - return; - } + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH); - /* Generate the second operand, i.e. the 'real' value */ + regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); + genReleaseReg(opsPtr[0]); + genReleaseReg(opsPtr[1]); + genReleaseReg(opsPtr[2]); + } - genCodeForTree(op2, needReg); - noway_assert(op2->gtFlags & GTF_REG_VAL); + if ((oper == GT_COPYBLK) && tree->AsBlkOp()->IsVolatile()) + { + // Emit a memory barrier instruction after the CopyBlk + instGen_MemoryBarrier(); + } +#endif // !CPU_USES_BLOCK_MOVE + } +} +BasicBlock dummyBB; - /* The result of 'op2' is also the final result */ +#ifdef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:21000) // Suppress PREFast warning about overly large function +#endif +void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, + regMaskTP destReg, + regMaskTP bestReg) +{ + const genTreeOps oper = tree->OperGet(); + const var_types treeType = tree->TypeGet(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + regNumber reg = DUMMY_INIT(REG_CORRUPT); + regMaskTP regs = regSet.rsMaskUsed; + regMaskTP needReg = destReg; + insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + emitAttr size; + instruction ins; + regMaskTP addrReg; + GenTreePtr opsPtr[3]; + regMaskTP regsPtr[3]; - reg = op2->gtRegNum; +#ifdef DEBUG + addrReg = 0xDEADCAFE; +#endif - /* Remember whether we set the flags */ + noway_assert(tree->OperKind() & GTK_SMPOP); - tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET); + switch (oper) + { + case GT_ASG: + genCodeForTreeSmpOpAsg(tree); + return; - genCodeForTree_DONE(tree, reg); - return; - } + case GT_ASG_LSH: + case GT_ASG_RSH: + case GT_ASG_RSZ: + genCodeForAsgShift(tree, destReg, bestReg); + return; - case GT_BOX: - genCodeForTree(op1, needReg); - noway_assert(op1->gtFlags & GTF_REG_VAL); + case GT_ASG_AND: + case GT_ASG_OR : + case GT_ASG_XOR: + case GT_ASG_ADD: + case GT_ASG_SUB: + genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg); + return; - /* The result of 'op1' is also the final result */ + case GT_CHS: + addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true); +#ifdef _TARGET_XARCH_ + // Note that the specialCase here occurs when the treeType specifies a byte sized operation + // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI) + // + bool specialCase; specialCase = false; + if (op1->gtOper == GT_REG_VAR) + { + /* Get hold of the target register */ - reg = op1->gtRegNum; + reg = op1->gtRegVar.gtRegNum; + if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); - /* Remember whether we set the flags */ + inst_RV_RV(INS_mov, byteReg, reg); + regTracker.rsTrackRegTrash(byteReg); - tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET); + inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType)); + var_types op1Type = op1->TypeGet(); + instruction wideningIns = ins_Move_Extend(op1Type, true); + inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type)); + regTracker.rsTrackRegTrash(reg); + specialCase = true; + } + } - genCodeForTree_DONE(tree, reg); + if (!specialCase) + { + inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType)); + } +#else // not _TARGET_XARCH_ + if (op1->gtFlags & GTF_REG_VAL) + { + inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags); + } + else + { + // Fix 388382 ARM JitStress WP7 + var_types op1Type = op1->TypeGet(); + regNumber reg = regSet.rsPickFreeReg(); + inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type)); + regTracker.rsTrackRegTrash(reg); + inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags); + inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type)); + } +#endif + if (op1->gtFlags & GTF_REG_VAL) + regTracker.rsTrackRegTrash(op1->gtRegNum); + genDoneAddressable(op1, addrReg, RegSet::KEEP_REG); + + genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false); return; - case GT_QMARK: + case GT_AND: + case GT_OR : + case GT_XOR: + case GT_ADD: + case GT_SUB: + case GT_MUL: + genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg); + return; - genCodeForQmark(tree, destReg, bestReg); + case GT_UMOD: + genCodeForUnsignedMod(tree, destReg, bestReg); return; - case GT_NOP: + case GT_MOD: + genCodeForSignedMod(tree, destReg, bestReg); + return; -#if OPT_BOOL_OPS - if (op1 == NULL) - return; -#endif + case GT_UDIV: + genCodeForUnsignedDiv(tree, destReg, bestReg); + return; - /* Generate the operand into some register */ + case GT_DIV: + genCodeForSignedDiv(tree, destReg, bestReg); + return; - genCodeForTree(op1, needReg); + case GT_LSH: + case GT_RSH: + case GT_RSZ: + genCodeForShift(tree, destReg, bestReg); + return; - /* The result is the same as the operand */ + case GT_NEG: + case GT_NOT: - reg = op1->gtRegNum; + /* Generate the operand into some register */ - genCodeForTree_DONE(tree, reg); - return; + genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); - case GT_INTRINSIC: + reg = op1->gtRegNum; - switch (tree->gtIntrinsic.gtIntrinsicId) - { - case CORINFO_INTRINSIC_Round: - { - noway_assert(tree->gtType == TYP_INT); + /* Negate/reverse the value in the register */ -#if FEATURE_STACK_FP_X87 - genCodeForTreeFlt(op1); + inst_RV((oper == GT_NEG) ? INS_NEG + : INS_NOT, reg, treeType); - /* Store the FP value into the temp */ - TempDsc* temp = compiler->tmpGetTemp(TYP_INT); + /* The register is now trashed */ - FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); - FlatFPX87_Kill(&compCurFPState, op1->gtRegNum); - inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0); + regTracker.rsTrackRegTrash(reg); - reg = regSet.rsPickReg(needReg, bestReg); - regTracker.rsTrackRegTrash(reg); + genCodeForTree_DONE(tree, reg); + return; - inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT); + case GT_IND: + case GT_NULLCHECK: // At this point, explicit null checks are just like inds... - compiler->tmpRlsTemp(temp); -#else - genCodeForTreeFloat(tree, needReg, bestReg); - return; -#endif - } - break; + /* Make sure the operand is addressable */ - default: - noway_assert(!"unexpected math intrinsic"); + addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true); - } + genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); - genCodeForTree_DONE(tree, reg); - return; + /* Figure out the size of the value being loaded */ - case GT_LCLHEAP: + size = EA_ATTR(genTypeSize(tree->gtType)); - reg = genLclHeap(op1); - genCodeForTree_DONE(tree, reg); - return; + /* Pick a register for the value */ - case GT_COPYOBJ: - noway_assert(op1->IsList()); + if (needReg == RBM_ALLINT && bestReg == 0) + { + /* Absent a better suggestion, pick a useless register */ - /* If the value class doesn't have any fields that are GC refs or - the target isn't on the GC-heap, we can merge it with CPBLK. - GC fields cannot be copied directly, instead we will - need to use a jit-helper for that. */ - assert(tree->AsCpObj()->gtGcPtrCount > 0); + bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs()); + } + reg = regSet.rsPickReg(needReg, bestReg); + + if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL)) { - GenTreeCpObj* cpObjOp = tree->AsCpObj(); + noway_assert(size == EA_PTRSIZE); + getEmitter()->emitIns_R_C (ins_Load(TYP_I_IMPL), + EA_PTRSIZE, + reg, + FLD_GLOBAL_FS, + (int)op1->gtIntCon.gtIconVal); + } + else + { + /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */ + + inst_mov_RV_ST(reg, tree); + } #ifdef _TARGET_ARM_ - if (cpObjOp->IsVolatile()) - { - // Emit a memory barrier instruction before the CopyBlk - instGen_MemoryBarrier(); - } + if (tree->gtFlags & GTF_IND_VOLATILE) + { + // Emit a memory barrier instruction after the load + instGen_MemoryBarrier(); + } #endif - GenTreePtr srcObj = cpObjOp->Source(); - GenTreePtr dstObj = cpObjOp->Dest(); - noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL); + /* Note the new contents of the register we used */ -#ifdef DEBUG - CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal; - size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); + regTracker.rsTrackRegTrash(reg); - // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers. - // The EE currently does not allow this. Let's assert it just to be safe. - noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize); + /* Update the live set of register variables */ + +#ifdef DEBUG + if (compiler->opts.varNames) genUpdateLife(tree); #endif - size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE; - unsigned slots = cpObjOp->gtSlots; - BYTE * gcPtrs = cpObjOp->gtGcPtrs; - unsigned gcPtrCount = cpObjOp->gtGcPtrCount; + /* Now we can update the register pointer information */ - // If we have GC pointers then the GTF_BLK_HASGCPTR flags must be set - if (gcPtrCount > 0) - assert((tree->gtFlags & GTF_BLK_HASGCPTR) != 0); +// genDoneAddressable(tree, addrReg, RegSet::KEEP_REG); + gcInfo.gcMarkRegPtrVal(reg, treeType); - GenTreePtr treeFirst, treeSecond; - regNumber regFirst, regSecond; + genCodeForTree_DONE_LIFE(tree, reg); + return; - // Check what order the object-ptrs have to be evaluated in ? + case GT_CAST: - if (op1->gtFlags & GTF_REVERSE_OPS) - { - treeFirst = srcObj; - treeSecond = dstObj; -#if CPU_USES_BLOCK_MOVE - regFirst = REG_ESI; - regSecond = REG_EDI; -#else - regFirst = REG_ARG_1; - regSecond = REG_ARG_0; + genCodeForNumericCast(tree, destReg, bestReg); + return; + + + case GT_JTRUE: + + /* Is this a test of a relational operator? */ + + if (op1->OperIsCompare()) + { + /* Generate the conditional jump */ + + genCondJump(op1); + + genUpdateLife(tree); + return; + } + +#ifdef DEBUG + compiler->gtDispTree(tree); #endif - } - else - { - treeFirst = dstObj; - treeSecond = srcObj; -#if CPU_USES_BLOCK_MOVE - regFirst = REG_EDI; - regSecond = REG_ESI; + NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?"); + break; + + case GT_SWITCH: + genCodeForSwitch(tree); + return; + + case GT_RETFILT: + noway_assert(tree->gtType == TYP_VOID || op1 != 0); + if (op1 == 0) // endfinally + { + reg = REG_NA; + +#ifdef _TARGET_XARCH_ + /* Return using a pop-jmp sequence. As the "try" block calls + the finally with a jmp, this leaves the x86 call-ret stack + balanced in the normal flow of path. */ + + noway_assert(isFramePointerRequired()); + inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); + inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); +#elif defined(_TARGET_ARM_) + // Nothing needed for ARM #else - regFirst = REG_ARG_0; - regSecond = REG_ARG_1; + NYI("TARGET"); #endif - } + } + else // endfilter + { + genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_INTRET); + /* The return value has now been computed */ + reg = op1->gtRegNum; - bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK)); - bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK)); - emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; - emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; + /* Return */ + instGen_Return(0); + } - // Materialize the trees in the order desired + genCodeForTree_DONE(tree, reg); + return; -#if CPU_USES_BLOCK_MOVE - genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); - genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); - genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); + case GT_RETURN: - // Grab ECX because it will be trashed by the helper - // - regSet.rsGrabReg(RBM_ECX); +#if INLINE_NDIRECT - while (blkSize >= TARGET_POINTER_SIZE) + // TODO: this should be done AFTER we called exit mon so that + // we are sure that we don't have to keep 'this' alive + + if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB)) + { + /* either it's an "empty" statement or the return statement + of a synchronized method + */ + + genPInvokeMethodEpilog(); + } + +#endif + + /* Is there a return value and/or an exit statement? */ + + if (op1) + { + if (op1->gtType == TYP_VOID) { - if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack) + //We're returning nothing, just generate the block (shared epilog calls). + genCodeForTree(op1, 0); + } +#ifdef _TARGET_ARM_ + else if (op1->gtType == TYP_STRUCT) + { + if (op1->gtOper == GT_CALL) { - // Note that we can use movsd even if it is a GC pointer being transfered - // because the value is not cached anywhere. If we did this in two moves, - // we would have to make certain we passed the appropriate GC info on to - // the emitter. - instGen(INS_movsp); + // We have a return call() because we failed to tail call. + // In any case, just generate the call and be done. + assert(compiler->IsHfa(op1)); + genCodeForCall(op1, true); + genMarkTreeInReg(op1, REG_FLOATRET); } else { - // This helper will act like a MOVSD - // -- inputs EDI and ESI are byrefs - // -- including incrementing of ESI and EDI by 4 - // -- helper will trash ECX - // - regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); - regSet.rsLockUsedReg(argRegs); - genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, - 0, // argSize - EA_PTRSIZE); // retSize - regSet.rsUnlockUsedReg(argRegs); + assert(op1->gtOper == GT_LCL_VAR); + assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum))); + genLoadIntoFltRetRegs(op1); } - - blkSize -= TARGET_POINTER_SIZE; } + else if (op1->TypeGet() == TYP_FLOAT) + { + // This can only occur when we are returning a non-HFA struct + // that is composed of a single float field and we performed + // struct promotion and enregistered the float field. + // + genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG); + getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum); + } +#endif // _TARGET_ARM_ + else + { + //we can now go through this code for compiler->genReturnBB. I've regularized all the code. - // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers + //noway_assert(compiler->compCurBB != compiler->genReturnBB); + + noway_assert(op1->gtType != TYP_VOID); - regTracker.rsTrackRegTrash(REG_EDI); - regTracker.rsTrackRegTrash(REG_ESI); - regTracker.rsTrackRegTrash(REG_ECX); + /* Generate the return value into the return register */ - gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI); + genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG); - /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as - it is a emitNoGChelper. However, we have to let the emitter know that - the GC liveness has changed. We do this by creating a new label. - */ + /* The result must now be in the return register */ - noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); + noway_assert(op1->gtFlags & GTF_REG_VAL); + noway_assert(op1->gtRegNum == REG_INTRET); + } - genDefineTempLabel(&dummyBB); + /* The return value has now been computed */ -#else // !CPU_USES_BLOCK_MOVE + reg = op1->gtRegNum; -#ifndef _TARGET_ARM_ - // Currently only the ARM implementation is provided -#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE" -#endif + genCodeForTree_DONE(tree, reg); - bool helperUsed; - regNumber regDst; - regNumber regSrc; - regNumber regTemp; + } - if ((gcPtrCount > 0) && !dstIsOnStack) - { - genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); - genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); - genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); + //The profiling hook does not trash registers, so it's safe to call after we emit the code for + //the GT_RETURN tree. +#ifdef PROFILING_SUPPORTED + if (compiler->compCurBB == compiler->genReturnBB) + { + genProfilingLeaveCallback(); + } +#endif +#ifdef DEBUG + if (compiler->opts.compStackCheckOnRet) + { + noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && + compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); + getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); - /* The helper is a Asm-routine that will trash R2,R3 and LR */ - { - /* Spill any callee-saved registers which are being used */ - regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed; + BasicBlock * esp_check = genCreateTempLabel(); + emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED); + inst_JMP(jmpEqual, esp_check); + getEmitter()->emitIns(INS_BREAKPOINT); + genDefineTempLabel(esp_check); + } +#endif + return; - if (spillRegs) - { - regSet.rsSpillRegs(spillRegs); - } - } + case GT_COMMA: - // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper - // We will also use it as the temp register for our load/store sequences - // - assert(REG_R2 == REG_TMP_1); - regTemp = regSet.rsGrabReg(RBM_R2); - helperUsed = true; - } - else + if (tree->gtFlags & GTF_REVERSE_OPS) + { + if (tree->gtType == TYP_VOID) { - genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG); - genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG); - genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG); - - // Grab any temp register to use for our load/store sequences - // - regTemp = regSet.rsGrabReg(RBM_ALLINT); - helperUsed = false; + genEvalSideEffects(op2); + genUpdateLife (op2); + genEvalSideEffects(op1); + genUpdateLife(tree); + return; } - assert(dstObj->gtFlags & GTF_REG_VAL); - assert(srcObj->gtFlags & GTF_REG_VAL); - - regDst = dstObj->gtRegNum; - regSrc = srcObj->gtRegNum; - assert(regDst != regTemp); - assert(regSrc != regTemp); - - instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr - instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str - - size_t offset = 0; - while (blkSize >= TARGET_POINTER_SIZE) - { - CorInfoGCType gcType; - CorInfoGCType gcTypeNext = TYPE_GC_NONE; - var_types type = TYP_I_IMPL; + // Generate op2 + genCodeForTree(op2, needReg); + genUpdateLife(op2); -#if FEATURE_WRITE_BARRIER - gcType = (CorInfoGCType)(*gcPtrs++); - if (blkSize > TARGET_POINTER_SIZE) - gcTypeNext = (CorInfoGCType)(*gcPtrs); + noway_assert(op2->gtFlags & GTF_REG_VAL); - if (gcType == TYPE_GC_REF) - type = TYP_REF; - else if (gcType == TYPE_GC_BYREF) - type = TYP_BYREF; + regSet.rsMarkRegUsed(op2); - if (helperUsed) - { - assert(regDst == REG_ARG_0); - assert(regSrc == REG_ARG_1); - assert(regTemp == REG_R2); - } -#else - gcType = TYPE_GC_NONE; -#endif // FEATURE_WRITE_BARRIER + // Do side effects of op1 + genEvalSideEffects(op1); - blkSize -= TARGET_POINTER_SIZE; + // Recover op2 if spilled + genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG); - emitAttr opSize = emitTypeSize(type); + regSet.rsMarkRegFree(genRegMask(op2->gtRegNum)); - if (!helperUsed || (gcType == TYPE_GC_NONE)) - { - getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset); - getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset); - offset += TARGET_POINTER_SIZE; + // set gc info if we need so + gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType); - if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || - ((offset >= 128) && (blkSize > 0))) - { - getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset); - getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset); - offset = 0; - } - } - else - { - assert(offset == 0); + genUpdateLife(tree); + genCodeForTree_DONE(tree, op2->gtRegNum); - // The helper will act like this: - // -- inputs R0 and R1 are byrefs - // -- helper will perform copy from *R1 into *R0 - // -- helper will perform post increment of R0 and R1 by 4 - // -- helper will trash R2 - // -- helper will trash R3 - // -- calling the helper implicitly trashes LR - // - assert(helperUsed); - regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); - regSet.rsLockUsedReg(argRegs); - genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, - 0, // argSize - EA_PTRSIZE); // retSize + return; + } + else + { + noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); - regSet.rsUnlockUsedReg(argRegs); - regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC); - } - } + /* Generate side effects of the first operand */ - regTracker.rsTrackRegTrash(regDst); - regTracker.rsTrackRegTrash(regSrc); - regTracker.rsTrackRegTrash(regTemp); + genEvalSideEffects(op1); + genUpdateLife (op1); - gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc)); + /* Is the value of the second operand used? */ - /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as - it is a emitNoGChelper. However, we have to let the emitter know that - the GC liveness has changed. We do this by creating a new label. - */ + if (tree->gtType == TYP_VOID) + { + /* The right operand produces no result. The morpher is + responsible for resetting the type of GT_COMMA nodes + to TYP_VOID if op2 isn't meant to yield a result. */ - noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); + genEvalSideEffects(op2); + genUpdateLife(tree); + return; + } - genDefineTempLabel(&dummyBB); + /* Generate the second operand, i.e. the 'real' value */ -#endif // !CPU_USES_BLOCK_MOVE + genCodeForTree(op2, needReg); + noway_assert(op2->gtFlags & GTF_REG_VAL); - assert(blkSize == 0); + /* The result of 'op2' is also the final result */ - genReleaseReg(dstObj); - genReleaseReg(srcObj); + reg = op2->gtRegNum; - reg = REG_NA; + /* Remember whether we set the flags */ - genCodeForTree_DONE(tree, reg); + tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET); -#ifdef _TARGET_ARM_ - if (tree->AsBlkOp()->IsVolatile()) - { - // Emit a memory barrier instruction after the CopyBlk - instGen_MemoryBarrier(); - } -#endif + genCodeForTree_DONE(tree, reg); + return; } - return; - - case GT_COPYBLK: - case GT_INITBLK: - noway_assert(oper == GT_COPYBLK || oper == GT_INITBLK); - noway_assert(op1->IsList()); + case GT_BOX: + genCodeForTree(op1, needReg); + noway_assert(op1->gtFlags & GTF_REG_VAL); -#ifdef _TARGET_ARM_ - if (tree->AsBlkOp()->IsVolatile()) - { - // Emit a memory barrier instruction before the InitBlk/CopyBlk - instGen_MemoryBarrier(); - } -#endif - { - GenTreePtr destPtr, srcPtrOrVal; - destPtr = op1->gtOp.gtOp1; - srcPtrOrVal = op1->gtOp.gtOp2; - noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet())); - noway_assert((oper == GT_COPYBLK && - (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) - || - (oper == GT_INITBLK && - varTypeIsIntegral(srcPtrOrVal->TypeGet()))); + /* The result of 'op1' is also the final result */ - noway_assert(op1 && op1->IsList()); - noway_assert(destPtr && srcPtrOrVal); + reg = op1->gtRegNum; -#if CPU_USES_BLOCK_MOVE - regs = (oper == GT_INITBLK) ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src + /* Remember whether we set the flags */ - /* Some special code for block moves/inits for constant sizes */ + tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET); - // - // Is this a fixed size COPYBLK? - // or a fixed size INITBLK with a constant init value? - // - if ((op2->IsCnsIntOrI()) && - ((oper == GT_COPYBLK) || (srcPtrOrVal->IsCnsIntOrI()))) - { - size_t length = (size_t)op2->gtIntCon.gtIconVal; - size_t initVal = 0; - instruction ins_P, ins_PR, ins_B; + genCodeForTree_DONE(tree, reg); + return; - if (oper == GT_INITBLK) - { - ins_P = INS_stosp; - ins_PR = INS_r_stosp; - ins_B = INS_stosb; + case GT_QMARK: - /* Properly extend the init constant from a U1 to a U4 */ - initVal = 0xFF & ((unsigned)op1->gtOp.gtOp2->gtIntCon.gtIconVal); + genCodeForQmark(tree, destReg, bestReg); + return; - /* If it is a non-zero value we have to replicate */ - /* the byte value four times to form the DWORD */ - /* Then we change this new value into the tree-node */ + case GT_NOP: - if (initVal) - { - initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); -#ifdef _TARGET_64BIT_ - if (length > 4) - { - initVal = initVal | (initVal << 32); - op1->gtOp.gtOp2->gtType = TYP_LONG; - } - else - { - op1->gtOp.gtOp2->gtType = TYP_INT; - } -#endif // _TARGET_64BIT_ - } - op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal; - } - else - { - ins_P = INS_movsp; - ins_PR = INS_r_movsp; - ins_B = INS_movsb; - } +#if OPT_BOOL_OPS + if (op1 == NULL) + return; +#endif - // Determine if we will be using SSE2 - unsigned movqLenMin = 8; - unsigned movqLenMax = 24; + /* Generate the operand into some register */ - bool bWillUseSSE2 = false; - bool bWillUseOnlySSE2 = false; - bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there. + genCodeForTree(op1, needReg); -#ifdef _TARGET_64BIT_ + /* The result is the same as the operand */ - // Until we get SSE2 instructions that move 16 bytes at a time instead of just 8 - // there is no point in wasting space on the bigger instructions + reg = op1->gtRegNum; -#else // !_TARGET_64BIT_ + genCodeForTree_DONE(tree, reg); + return; - if (compiler->opts.compCanUseSSE2) - { - unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler); + case GT_INTRINSIC: - /* Adjust for BB weight */ - if (curBBweight == BB_ZERO_WEIGHT) - { - // Don't bother with this optimization in - // rarely run blocks - movqLenMax = movqLenMin = 0; - } - else if (curBBweight < BB_UNITY_WEIGHT) - { - // Be less aggressive when we are inside a conditional - movqLenMax = 16; - } - else if (curBBweight >= (BB_LOOP_WEIGHT*BB_UNITY_WEIGHT) / 2) - { - // Be more aggressive when we are inside a loop - movqLenMax = 48; - } + switch (tree->gtIntrinsic.gtIntrinsicId) + { + case CORINFO_INTRINSIC_Round: + { + noway_assert(tree->gtType == TYP_INT); - if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || (oper == GT_INITBLK)) - { - // Be more aggressive when optimizing for speed - // InitBlk uses fewer instructions - movqLenMax += 16; - } +#if FEATURE_STACK_FP_X87 + genCodeForTreeFlt(op1); - if (compiler->compCodeOpt() != Compiler::SMALL_CODE && - length >= movqLenMin && - length <= movqLenMax) - { - bWillUseSSE2 = true; + /* Store the FP value into the temp */ + TempDsc* temp = compiler->tmpGetTemp(TYP_INT); - if ((length % 8) == 0) - { - bWillUseOnlySSE2 = true; - if (oper == GT_INITBLK && (initVal == 0)) - { - bNeedEvaluateCnst = false; - noway_assert((op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)); - } - } - } - } + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + FlatFPX87_Kill(&compCurFPState, op1->gtRegNum); + inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0); -#endif // !_TARGET_64BIT_ + reg = regSet.rsPickReg(needReg, bestReg); + regTracker.rsTrackRegTrash(reg); - const bool bWillTrashRegSrc = ((oper == GT_COPYBLK) && !bWillUseOnlySSE2); - /* Evaluate dest and src/val */ + inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT); - if (op1->gtFlags & GTF_REVERSE_OPS) - { - if (bNeedEvaluateCnst) - { - genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); - } - genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); - if (bNeedEvaluateCnst) - { - genRecoverReg(op1->gtOp.gtOp2, regs, RegSet::KEEP_REG); - } - } - else - { - genComputeReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2); - if (bNeedEvaluateCnst) - { - genComputeReg(op1->gtOp.gtOp2, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc); - } - genRecoverReg(op1->gtOp.gtOp1, RBM_EDI, RegSet::KEEP_REG); - } + compiler->tmpRlsTemp(temp); +#else + genCodeForTreeFloat(tree, needReg, bestReg); + return; +#endif + } + break; - bool bTrashedESI = false; - bool bTrashedEDI = false; + default: + noway_assert(!"unexpected math intrinsic"); - if (bWillUseSSE2) - { - int blkDisp = 0; - regNumber xmmReg = REG_XMM0; + } - if (oper == GT_INITBLK) - { - if (initVal) - { - getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX); - getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg); - } - else - { - getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg); - } - } + genCodeForTree_DONE(tree, reg); + return; - JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n", - length, (oper == GT_INITBLK) ? "initblk" : "copyblk", compiler->info.compFullName)); + case GT_LCLHEAP: - while (length > 7) - { - if (oper == GT_INITBLK) - { - getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); - } - else - { - getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp); - getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp); - } - blkDisp += 8; - length -= 8; - } + reg = genLclHeap(op1); + genCodeForTree_DONE(tree, reg); + return; - if (length > 0) - { - noway_assert(bNeedEvaluateCnst); - noway_assert(!bWillUseOnlySSE2); + case GT_COPYOBJ: + noway_assert(op1->IsList()); - if (oper == GT_COPYBLK) - { - inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet())); - bTrashedESI = true; - } + /* If the value class doesn't have any fields that are GC refs or + the target isn't on the GC-heap, we can merge it with CPBLK. + GC fields cannot be copied directly, instead we will + need to use a jit-helper for that. */ + assert(tree->AsCpObj()->gtGcPtrCount > 0); - inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet())); - bTrashedEDI = true; + { + GenTreeCpObj* cpObjOp = tree->AsCpObj(); - if (length >= REGSIZE_BYTES) - { - instGen(ins_P); - length -= REGSIZE_BYTES; - } - } - } - else if (compiler->compCodeOpt() == Compiler::SMALL_CODE) - { - /* For small code, we can only use ins_DR to generate fast - and small code. We also can't use "rep movsb" because - we may not atomically reading and writing the DWORD */ +#ifdef _TARGET_ARM_ + if (cpObjOp->IsVolatile()) + { + // Emit a memory barrier instruction before the CopyBlk + instGen_MemoryBarrier(); + } +#endif + GenTreePtr srcObj = cpObjOp->Source(); + GenTreePtr dstObj = cpObjOp->Dest(); - noway_assert(bNeedEvaluateCnst); + noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL); - goto USE_DR; - } - else if (length <= 4 * REGSIZE_BYTES) - { - noway_assert(bNeedEvaluateCnst); +#ifdef DEBUG + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal; + size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); - while (length >= REGSIZE_BYTES) - { - instGen(ins_P); - length -= REGSIZE_BYTES; - } + // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers. + // The EE currently does not allow this. Let's assert it just to be safe. + noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize); +#endif - bTrashedEDI = true; - if (oper == GT_COPYBLK) - bTrashedESI = true; - } - else - { - USE_DR: - noway_assert(bNeedEvaluateCnst); + size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE; + unsigned slots = cpObjOp->gtSlots; + BYTE * gcPtrs = cpObjOp->gtGcPtrs; + unsigned gcPtrCount = cpObjOp->gtGcPtrCount; + + // If we have GC pointers then the GTF_BLK_HASGCPTR flags must be set + if (gcPtrCount > 0) + assert((tree->gtFlags & GTF_BLK_HASGCPTR) != 0); - /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */ - genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL); + GenTreePtr treeFirst, treeSecond; + regNumber regFirst, regSecond; + + // Check what order the object-ptrs have to be evaluated in ? - length &= (REGSIZE_BYTES - 1); + if (op1->gtFlags & GTF_REVERSE_OPS) + { + treeFirst = srcObj; + treeSecond = dstObj; +#if CPU_USES_BLOCK_MOVE + regFirst = REG_ESI; + regSecond = REG_EDI; +#else + regFirst = REG_ARG_1; + regSecond = REG_ARG_0; +#endif + } + else + { + treeFirst = dstObj; + treeSecond = srcObj; +#if CPU_USES_BLOCK_MOVE + regFirst = REG_EDI; + regSecond = REG_ESI; +#else + regFirst = REG_ARG_0; + regSecond = REG_ARG_1; +#endif + } - instGen(ins_PR); + bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK)); + bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK)); + emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; + emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; - regTracker.rsTrackRegTrash(REG_ECX); + // Materialize the trees in the order desired - bTrashedEDI = true; - if (oper == GT_COPYBLK) - bTrashedESI = true; - } +#if CPU_USES_BLOCK_MOVE + genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); - /* Now take care of the remainder */ + // Grab ECX because it will be trashed by the helper + // + regSet.rsGrabReg(RBM_ECX); -#ifdef _TARGET_64BIT_ - if (length > 4) + while (blkSize >= TARGET_POINTER_SIZE) + { + if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack) { - noway_assert(bNeedEvaluateCnst); - noway_assert(length < 8); - - instGen((oper == GT_INITBLK) ? INS_stosd : INS_movsd); - length -= 4; - - bTrashedEDI = true; - if (oper == GT_COPYBLK) - bTrashedESI = true; + // Note that we can use movsd even if it is a GC pointer being transfered + // because the value is not cached anywhere. If we did this in two moves, + // we would have to make certain we passed the appropriate GC info on to + // the emitter. + instGen(INS_movsp); } - -#endif // _TARGET_64BIT_ - - if (length) + else { - noway_assert(bNeedEvaluateCnst); - - while (length--) - { - instGen(ins_B); - } - - bTrashedEDI = true; - if (oper == GT_COPYBLK) - bTrashedESI = true; + // This helper will act like a MOVSD + // -- inputs EDI and ESI are byrefs + // -- including incrementing of ESI and EDI by 4 + // -- helper will trash ECX + // + regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); + regSet.rsLockUsedReg(argRegs); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, + 0, // argSize + EA_PTRSIZE); // retSize + regSet.rsUnlockUsedReg(argRegs); } - noway_assert(bTrashedEDI == !bWillUseOnlySSE2); - if (bTrashedEDI) - regTracker.rsTrackRegTrash(REG_EDI); - if (bTrashedESI) - regTracker.rsTrackRegTrash(REG_ESI); - // else No need to trash EAX as it wasnt destroyed by the "rep stos" - - genReleaseReg(op1->gtOp.gtOp1); - if (bNeedEvaluateCnst) genReleaseReg(op1->gtOp.gtOp2); - + blkSize -= TARGET_POINTER_SIZE; } - else - { - // - // This a variable-sized COPYBLK/INITBLK, - // or a fixed size INITBLK with a variable init value, - // - - // What order should the Dest, Val/Src, and Size be calculated - compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, - opsPtr, regsPtr); // OUT arguments - - noway_assert(((oper == GT_INITBLK) && (regs == RBM_EAX)) || ((oper == GT_COPYBLK) && (regs == RBM_ESI))); - genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX)); - genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX)); - genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX)); - - genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); - genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); - - noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest - (op1->gtOp.gtOp1->gtRegNum == REG_EDI)); - - noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src - (genRegMask(op1->gtOp.gtOp2->gtRegNum) == regs)); + // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers - noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size - (op2->gtRegNum == REG_ECX)); + regTracker.rsTrackRegTrash(REG_EDI); + regTracker.rsTrackRegTrash(REG_ESI); + regTracker.rsTrackRegTrash(REG_ECX); - if (oper == GT_INITBLK) - instGen(INS_r_stosb); - else - instGen(INS_r_movsb); + gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI); - regTracker.rsTrackRegTrash(REG_EDI); - regTracker.rsTrackRegTrash(REG_ECX); + /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as + it is a emitNoGChelper. However, we have to let the emitter know that + the GC liveness has changed. We do this by creating a new label. + */ - if (oper == GT_COPYBLK) - regTracker.rsTrackRegTrash(REG_ESI); - // else No need to trash EAX as it wasnt destroyed by the "rep stos" + noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); - genReleaseReg(opsPtr[0]); - genReleaseReg(opsPtr[1]); - genReleaseReg(opsPtr[2]); - } + genDefineTempLabel(&dummyBB); -#else // !CPU_USES_BLOCK_MOVE +#else // !CPU_USES_BLOCK_MOVE #ifndef _TARGET_ARM_ // Currently only the ARM implementation is provided -#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE" +#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE" #endif - // - // Is this a fixed size COPYBLK? - // or a fixed size INITBLK with a constant init value? - // - if ((op2->OperGet() == GT_CNS_INT) && - ((oper == GT_COPYBLK) || (srcPtrOrVal->OperGet() == GT_CNS_INT))) + + bool helperUsed; + regNumber regDst; + regNumber regSrc; + regNumber regTemp; + + if ((gcPtrCount > 0) && !dstIsOnStack) { - GenTreePtr dstOp = op1->gtOp.gtOp1; - GenTreePtr srcOp = op1->gtOp.gtOp2; - unsigned length = (unsigned)op2->gtIntCon.gtIconVal; - unsigned fullStoreCount = length / TARGET_POINTER_SIZE; - unsigned initVal = 0; - bool useLoop = false; + genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true); + genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG); - if (oper == GT_INITBLK) + /* The helper is a Asm-routine that will trash R2,R3 and LR */ { - /* Properly extend the init constant from a U1 to a U4 */ - initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal); - - /* If it is a non-zero value we have to replicate */ - /* the byte value four times to form the DWORD */ - /* Then we store this new value into the tree-node */ + /* Spill any callee-saved registers which are being used */ + regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed; - if (initVal != 0) + if (spillRegs) { - initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24); - op1->gtOp.gtOp2->gtIntCon.gtIconVal = initVal; + regSet.rsSpillRegs(spillRegs); } } - // Will we be using a loop to implement this INITBLK/COPYBLK? - if (((oper == GT_COPYBLK) && (fullStoreCount >= 8)) || - ((oper == GT_INITBLK) && (fullStoreCount >= 16))) - { - useLoop = true; - } - - regMaskTP usedRegs; - regNumber regDst; - regNumber regSrc; - regNumber regTemp; + // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper + // We will also use it as the temp register for our load/store sequences + // + assert(REG_R2 == REG_TMP_1); + regTemp = regSet.rsGrabReg(RBM_R2); + helperUsed = true; + } + else + { + genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG); + genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG); + genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG); - /* Evaluate dest and src/val */ + // Grab any temp register to use for our load/store sequences + // + regTemp = regSet.rsGrabReg(RBM_ALLINT); + helperUsed = false; + } + assert(dstObj->gtFlags & GTF_REG_VAL); + assert(srcObj->gtFlags & GTF_REG_VAL); - if (op1->gtFlags & GTF_REVERSE_OPS) - { - genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); - assert(srcOp->gtFlags & GTF_REG_VAL); + regDst = dstObj->gtRegNum; + regSrc = srcObj->gtRegNum; - genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); - assert(dstOp->gtFlags & GTF_REG_VAL); - regDst = dstOp->gtRegNum; + assert(regDst != regTemp); + assert(regSrc != regTemp); - genRecoverReg(srcOp, needReg, RegSet::KEEP_REG); - regSrc = srcOp->gtRegNum; - } - else - { - genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); - assert(dstOp->gtFlags & GTF_REG_VAL); + instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr + instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str - genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop); - assert(srcOp->gtFlags & GTF_REG_VAL); - regSrc = srcOp->gtRegNum; + size_t offset = 0; + while (blkSize >= TARGET_POINTER_SIZE) + { + CorInfoGCType gcType; + CorInfoGCType gcTypeNext = TYPE_GC_NONE; + var_types type = TYP_I_IMPL; - genRecoverReg(dstOp, needReg, RegSet::KEEP_REG); - regDst = dstOp->gtRegNum; - } - assert(dstOp->gtFlags & GTF_REG_VAL); - assert(srcOp->gtFlags & GTF_REG_VAL); +#if FEATURE_WRITE_BARRIER + gcType = (CorInfoGCType)(*gcPtrs++); + if (blkSize > TARGET_POINTER_SIZE) + gcTypeNext = (CorInfoGCType)(*gcPtrs); - regDst = dstOp->gtRegNum; - regSrc = srcOp->gtRegNum; - usedRegs = (genRegMask(regSrc) | genRegMask(regDst)); - bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK)); - emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE; - emitAttr srcType; + if (gcType == TYPE_GC_REF) + type = TYP_REF; + else if (gcType == TYPE_GC_BYREF) + type = TYP_BYREF; - if (oper == GT_COPYBLK) - { - // Prefer a low register,but avoid one of the ones we've already grabbed - regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); - usedRegs |= genRegMask(regTemp); - bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK)); - srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE; - } - else + if (helperUsed) { - regTemp = REG_STK; - srcType = EA_PTRSIZE; + assert(regDst == REG_ARG_0); + assert(regSrc == REG_ARG_1); + assert(regTemp == REG_R2); } +#else + gcType = TYPE_GC_NONE; +#endif // FEATURE_WRITE_BARRIER - instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr - instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str - - int finalOffset; + blkSize -= TARGET_POINTER_SIZE; - // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK? - if (!useLoop) - { - for (unsigned i = 0; i < fullStoreCount; i++) - { - if (oper == GT_COPYBLK) - { - getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE); - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE); - gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); - regTracker.rsTrackRegTrash(regTemp); - } - else - { - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE); - } - } + emitAttr opSize = emitTypeSize(type); - finalOffset = fullStoreCount * TARGET_POINTER_SIZE; - length -= finalOffset; - } - else // We will use a loop to implement this INITBLK/COPYBLK + if (!helperUsed || (gcType == TYPE_GC_NONE)) { - unsigned pairStoreLoopCount = fullStoreCount / 2; - - // We need a second temp register for CopyBlk - regNumber regTemp2 = REG_STK; - if (oper == GT_COPYBLK) - { - // Prefer a low register, but avoid one of the ones we've already grabbed - regTemp2 = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); - usedRegs |= genRegMask(regTemp2); - } - - // Pick and initialize the loop counter register - regNumber regLoopIndex; - regLoopIndex = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS)); - genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT); - - // Create and define the Basic Block for the loop top - BasicBlock * loopTopBlock = genCreateTempLabel(); - genDefineTempLabel(loopTopBlock); - - // The loop body - if (oper == GT_COPYBLK) - { - getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); - getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE); - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE); - getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE); - gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); - gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2)); - regTracker.rsTrackRegTrash(regSrc); - regTracker.rsTrackRegTrash(regTemp); - regTracker.rsTrackRegTrash(regTemp2); - } - else // GT_INITBLK - { - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE); - } - - getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE); - regTracker.rsTrackRegTrash(regDst); - getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET); - emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED); - inst_JMP(jmpGTS, loopTopBlock); - - regTracker.rsTrackRegIntCns(regLoopIndex, 0); - - length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE)); + getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset); + getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset); + offset += TARGET_POINTER_SIZE; - if (length & TARGET_POINTER_SIZE) - { - if (oper == GT_COPYBLK) - { - getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0); - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0); - } - else - { - getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0); - } - finalOffset = TARGET_POINTER_SIZE; - length -= TARGET_POINTER_SIZE; - } - else + if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || + ((offset >= 128) && (blkSize > 0))) { - finalOffset = 0; + getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset); + getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset); + offset = 0; } } - - if (length & sizeof(short)) + else { - loadIns = ins_Load(TYP_USHORT); // INS_ldrh - storeIns = ins_Store(TYP_USHORT); // INS_strh - - if (oper == GT_COPYBLK) - { - getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset); - getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset); - gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); - regTracker.rsTrackRegTrash(regTemp); - } - else - { - getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset); - } - length -= sizeof(short); - finalOffset += sizeof(short); - } + assert(offset == 0); - if (length & sizeof(char)) - { - loadIns = ins_Load(TYP_UBYTE); // INS_ldrb - storeIns = ins_Store(TYP_UBYTE); // INS_strb + // The helper will act like this: + // -- inputs R0 and R1 are byrefs + // -- helper will perform copy from *R1 into *R0 + // -- helper will perform post increment of R0 and R1 by 4 + // -- helper will trash R2 + // -- helper will trash R3 + // -- calling the helper implicitly trashes LR + // + assert(helperUsed); + regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond); + regSet.rsLockUsedReg(argRegs); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, + 0, // argSize + EA_PTRSIZE); // retSize - if (oper == GT_COPYBLK) - { - getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset); - getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset); - gcInfo.gcMarkRegSetNpt(genRegMask(regTemp)); - regTracker.rsTrackRegTrash(regTemp); - } - else - { - getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset); - } - length -= sizeof(char); + regSet.rsUnlockUsedReg(argRegs); + regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC); } - assert(length == 0); - - genReleaseReg(dstOp); - genReleaseReg(srcOp); } - else - { - // - // This a variable-sized COPYBLK/INITBLK, - // or a fixed size INITBLK with a variable init value, - // - - // What order should the Dest, Val/Src, and Size be calculated - compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, - opsPtr, regsPtr); // OUT arguments + regTracker.rsTrackRegTrash(regDst); + regTracker.rsTrackRegTrash(regSrc); + regTracker.rsTrackRegTrash(regTemp); - genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG); - genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG); - genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG); + gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc)); - genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG); - genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG); + /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as + it is a emitNoGChelper. However, we have to let the emitter know that + the GC liveness has changed. We do this by creating a new label. + */ - noway_assert((op1->gtOp.gtOp1->gtFlags & GTF_REG_VAL) && // Dest - (op1->gtOp.gtOp1->gtRegNum == REG_ARG_0)); + noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF)); - noway_assert((op1->gtOp.gtOp2->gtFlags & GTF_REG_VAL) && // Val/Src - (op1->gtOp.gtOp2->gtRegNum == REG_ARG_1)); + genDefineTempLabel(&dummyBB); - noway_assert((op2->gtFlags & GTF_REG_VAL) && // Size - (op2->gtRegNum == REG_ARG_2)); +#endif // !CPU_USES_BLOCK_MOVE - regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); + assert(blkSize == 0); - genEmitHelperCall(oper == GT_COPYBLK ? CORINFO_HELP_MEMCPY - /* GT_INITBLK */ : CORINFO_HELP_MEMSET, - 0, EA_UNKNOWN); + genReleaseReg(dstObj); + genReleaseReg(srcObj); - regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH); + reg = REG_NA; - regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2); - genReleaseReg(opsPtr[0]); - genReleaseReg(opsPtr[1]); - genReleaseReg(opsPtr[2]); - } + genCodeForTree_DONE(tree, reg); - if ((oper == GT_COPYBLK) && tree->AsBlkOp()->IsVolatile()) +#ifdef _TARGET_ARM_ + if (tree->AsBlkOp()->IsVolatile()) { // Emit a memory barrier instruction after the CopyBlk instGen_MemoryBarrier(); } -#endif // !CPU_USES_BLOCK_MOVE - - reg = REG_NA; +#endif } + return; + + case GT_COPYBLK: + case GT_INITBLK: - genCodeForTree_DONE(tree, reg); + genCodeForBlkOp(tree, destReg); + genCodeForTree_DONE(tree, REG_NA); return; case GT_EQ: diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 9e82688..81dceb9 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -6008,6 +6008,11 @@ private: void rpPredictRefAssign (unsigned lclNum); + regMaskTP rpPredictBlkAsgRegUse(GenTreePtr tree, + rpPredictReg predictReg, + regMaskTP lockedRegs, + regMaskTP rsvdRegs); + regMaskTP rpPredictTreeRegUse (GenTreePtr tree, rpPredictReg predictReg, regMaskTP lockedRegs, diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index d09f123..010dded 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1560,7 +1560,8 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd, if (sortFields) { // Sort the fields according to the increasing order of the field offset. - // This is needed because the fields need to be pushed on stack (for GT_LDOBJ) in order. + // This is needed because the fields need to be pushed on stack (when referenced + // as a struct) in order. qsort(StructPromotionInfo->fields, StructPromotionInfo->fieldCnt, sizeof(*StructPromotionInfo->fields), diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp index 9635cde..cfd0803 100644 --- a/src/jit/optcse.cpp +++ b/src/jit/optcse.cpp @@ -2128,7 +2128,7 @@ bool Compiler::optIsCSEcandidate(GenTreePtr tree) var_types type = tree->TypeGet(); genTreeOps oper = tree->OperGet(); - // TODO-1stClassStructs: Enable CSE for TYP_SIMD (depends on either transforming + // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming // to use regular assignments, or handling copyObj. if (varTypeIsStruct(type) || type == TYP_VOID) return false; diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index 3a92f0f..68c072d 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -848,23 +848,18 @@ void Rationalizer::MorphAsgIntoStoreLcl(GenTreeStmt* stmt, GenTreePtr pTree) GenTreePtr lhs = pTree->gtGetOp1(); GenTreePtr rhs = pTree->gtGetOp2(); - assert(lhs->OperGet() == GT_LCL_VAR || - lhs->OperGet() == GT_LCL_FLD); + genTreeOps lhsOper = lhs->OperGet(); + genTreeOps storeOper; + assert(lhsOper == GT_LCL_VAR || lhsOper == GT_LCL_FLD); + + storeOper = storeForm(lhsOper); #ifdef DEBUG - if (lhs->OperGet() == GT_LCL_VAR) - { - JITDUMP("rewriting GT_ASG(GT_LCL_VAR, X) to GT_STORE_LCL_VAR(X)\n"); - } - else - { - assert(lhs->OperGet() == GT_LCL_FLD); - JITDUMP("rewriting GT_ASG(GT_LCL_FLD, X) to GT_STORE_LCL_FLD(X)\n"); - } + JITDUMP("rewriting asg(%s, X) to %s(X)\n", GenTree::NodeName(lhsOper), GenTree::NodeName(storeOper)); #endif // DEBUG GenTreeLclVarCommon* var = lhs->AsLclVarCommon(); - pTree->SetOper(storeForm(var->OperGet())); + pTree->SetOper(storeOper); GenTreeLclVarCommon* dst = pTree->AsLclVarCommon(); dst->SetLclNum(var->gtLclNum); dst->SetSsaNum(var->gtSsaNum); diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index 89f2955..867a0ac 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -1647,6 +1647,230 @@ void Compiler::rpPredictRefAssign(unsigned lclNum) #endif // NOGC_WRITE_BARRIERS } +/***************************************************************************** + * + * Predict the internal temp physical register usage for a block assignment tree, + * by setting tree->gtUsedRegs. + * Records the internal temp physical register usage for this tree. + * Returns a mask of interfering registers for this tree. + * + * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs + * to the set of scratch registers needed when evaluating the tree. + * Generally tree->gtUsedRegs and the return value retMask are the same, except when the + * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we + * predict additional internal temp physical registers to spill into. + * + * tree - is the child of a GT_IND node + * predictReg - what type of register does the tree need + * lockedRegs - are the registers which are currently held by a previously evaluated node. + * Don't modify lockedRegs as it is used at the end to compute a spill mask. + * rsvdRegs - registers which should not be allocated because they will + * be needed to evaluate a node in the future + * - Also, if rsvdRegs has the RBM_LASTUSE bit set then + * the rpLastUseVars set should be saved and restored + * so that we don't add any new variables to rpLastUseVars. + */ +regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree, + rpPredictReg predictReg, + regMaskTP lockedRegs, + regMaskTP rsvdRegs) +{ + regMaskTP regMask = RBM_NONE; + regMaskTP interferingRegs = RBM_NONE; + + bool hasGCpointer = false; + bool dstIsOnStack = false; + bool useMemHelper = false; + bool useBarriers = false; + + GenTreeBlkOp* blkNode = tree->AsBlkOp(); + GenTreePtr dstAddr = blkNode->Dest(); + GenTreePtr op1 = blkNode->gtGetOp1(); + GenTreePtr srcAddrOrFill = op1->gtGetOp2(); + GenTreePtr sizeNode = blkNode->gtGetOp2(); + + size_t blkSize = 0; + + hasGCpointer = ((tree->gtFlags & GTF_BLK_HASGCPTR) != 0); + + bool isCopyBlk = tree->OperIsCopyBlkOp(); + bool isCopyObj = (tree->OperGet() == GT_COPYOBJ); + bool isInitBlk = (tree->OperGet() == GT_INITBLK); + + if (sizeNode->OperGet() == GT_CNS_INT) + { + if (sizeNode->IsIconHandle(GTF_ICON_CLASS_HDL)) + { + if (isCopyObj) + { + dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK)); + } + + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE) sizeNode->gtIntCon.gtIconVal; + blkSize = roundUp(info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); + } + else // gtIconVal contains amount to copy + { + blkSize = (unsigned) sizeNode->gtIntCon.gtIconVal; + } + + if (isInitBlk) + { + if (srcAddrOrFill->OperGet() != GT_CNS_INT) + { + useMemHelper = true; + } + } + } + else + { + useMemHelper = true; + } + + if (hasGCpointer && !dstIsOnStack) + { + useBarriers = true; + } + +#ifdef _TARGET_ARM_ + // + // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths. + // + if (!useMemHelper && !useBarriers) + { + bool useLoop = false; + unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE; + + // A mask to use to force the predictor to choose low registers (to reduce code size) + regMaskTP avoidReg = (RBM_R12|RBM_LR); + + // Allow the src and dst to be used in place, unless we use a loop, in which + // case we will need scratch registers as we will be writing to them. + rpPredictReg srcAndDstPredict = PREDICT_REG; + + // Will we be using a loop to implement this INITBLK/COPYBLK? + if ((isCopyBlk && (fullStoreCount >= 8)) || + (isInitBlk && (fullStoreCount >= 16))) + { + useLoop = true; + avoidReg = RBM_NONE; + srcAndDstPredict = PREDICT_SCRATCH_REG; + } + + if (op1->gtFlags & GTF_REVERSE_OPS) + { + regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs, dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE); + regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg); + } + else + { + regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs, srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE); + regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg); + } + + // We need at least one scratch register for a copyBlk + if (isCopyBlk) + { + // Pick a low register to reduce the code size + regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg); + } + + if (useLoop) + { + if (isCopyBlk) + { + // We need a second temp register for a copyBlk (our code gen is load two/store two) + // Pick another low register to reduce the code size + regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg); + } + + // We need a loop index register + regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask); + } + + tree->gtUsedRegs = dstAddr->gtUsedRegs | + srcAddrOrFill->gtUsedRegs | + (regMaskSmall)regMask; + + return interferingRegs; + } +#endif + // What order should the Dest, Val/Src, and Size be calculated + GenTreePtr opsPtr [3]; + regMaskTP regsPtr[3]; + +#if defined(_TARGET_XARCH_) + fgOrderBlockOps(tree, + RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, + opsPtr, regsPtr); + + // We're going to use these, might as well make them available now + + codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX); + if (isCopyBlk) + codeGen->regSet.rsSetRegsModified(RBM_ESI); + +#elif defined(_TARGET_ARM_) + + if (useMemHelper) + { + // For all other cases that involve non-constants, we just call memcpy/memset + // JIT helpers + fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); + interferingRegs |= RBM_CALLEE_TRASH; +#ifdef DEBUG + if (verbose) + printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n"); +#endif + } + else // useBarriers + { + assert(useBarriers); + assert(isCopyBlk); + + fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr); + + // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper + interferingRegs |= RBM_CALLEE_TRASH_NOGC; +#ifdef DEBUG + if (verbose) + printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n"); +#endif + } +#else // !_TARGET_X86_ && !_TARGET_ARM_ +#error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK" +#endif // !_TARGET_X86_ && !_TARGET_ARM_ + regMask |= rpPredictTreeRegUse(opsPtr[0], + rpGetPredictForMask(regsPtr[0]), + lockedRegs, + opsPtr[1]->gtRsvdRegs | opsPtr[2]->gtRsvdRegs | RBM_LASTUSE); + regMask |= regsPtr[0]; + opsPtr[0]->gtUsedRegs |= regsPtr[0]; + rpRecordRegIntf(regsPtr[0], compCurLife + DEBUGARG("movsd dest")); + + regMask |= rpPredictTreeRegUse(opsPtr[1], + rpGetPredictForMask(regsPtr[1]), + lockedRegs | regMask, + opsPtr[2]->gtRsvdRegs | RBM_LASTUSE); + regMask |= regsPtr[1]; + opsPtr[1]->gtUsedRegs |= regsPtr[1]; + rpRecordRegIntf(regsPtr[1], compCurLife + DEBUGARG("movsd src")); + + regMask |= rpPredictTreeRegUse(opsPtr[2], + rpGetPredictForMask(regsPtr[2]), + lockedRegs | regMask, + RBM_NONE); + regMask |= regsPtr[2]; + opsPtr[2]->gtUsedRegs |= regsPtr[2]; + + tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | + opsPtr[1]->gtUsedRegs | + opsPtr[2]->gtUsedRegs | + (regMaskSmall)regMask; + return interferingRegs; +} /***************************************************************************** * @@ -4121,195 +4345,9 @@ HANDLE_SHIFT_COUNT: case GT_COPYOBJ: case GT_COPYBLK: case GT_INITBLK: - { - regMask = 0; - - bool hasGCpointer; hasGCpointer = false; - bool dstIsOnStack; dstIsOnStack = false; - bool useMemHelper; useMemHelper = false; - bool useBarriers; useBarriers = false; - - size_t blkSize; blkSize = 0; - - hasGCpointer = ((tree->gtFlags & GTF_BLK_HASGCPTR) != 0); - - if (op2->OperGet() == GT_CNS_INT) - { - if (op2->IsIconHandle(GTF_ICON_CLASS_HDL)) - { - if (tree->OperGet() == GT_COPYOBJ) - { - GenTreePtr dstObj = op1->gtOp.gtOp1; - dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK)); - } - - CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE) op2->gtIntCon.gtIconVal; - blkSize = roundUp(info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE); - } - else // gtIconVal contains amount to copy - { - blkSize = (unsigned) op2->gtIntCon.gtIconVal; - } - - if (tree->OperGet() == GT_INITBLK) - { - GenTreePtr initVal = op1->gtOp.gtOp2; - if (initVal->OperGet() != GT_CNS_INT) - { - useMemHelper = true; - } - } - } - else - { - useMemHelper = true; - } - - // If we are copying any GC pointers then the GTF_BLK_HASGCPTR flags must be set - if (hasGCpointer && !dstIsOnStack) - { - useBarriers = true; - } - -#ifdef _TARGET_ARM_ - // - // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths. - // - if (!useMemHelper && !useBarriers) - { - bool useLoop = false; - unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE; - - // A mask to use to force the predictor to choose low registers (to reduce code size) - regMaskTP avoidReg = (RBM_R12|RBM_LR); - - // Allow the src and dst to be used in place, unless we use a loop, in which - // case we will need scratch registers as we will be writing to them. - rpPredictReg srcAndDstPredict = PREDICT_REG; - - // Will we be using a loop to implement this INITBLK/COPYBLK? - if ((GenTree::OperIsCopyBlkOp(oper) && (fullStoreCount >= 8)) || - ((oper == GT_INITBLK) && (fullStoreCount >= 16))) - { - useLoop = true; - avoidReg = RBM_NONE; - srcAndDstPredict = PREDICT_SCRATCH_REG; - } - - if (op1->gtFlags & GTF_REVERSE_OPS) - { - regMask |= rpPredictTreeRegUse(op1->gtOp.gtOp2, srcAndDstPredict, lockedRegs, op1->gtOp.gtOp1->gtRsvdRegs | avoidReg | RBM_LASTUSE); - regMask |= rpPredictTreeRegUse(op1->gtOp.gtOp1, srcAndDstPredict, lockedRegs | regMask, avoidReg); - } - else - { - regMask |= rpPredictTreeRegUse(op1->gtOp.gtOp1, srcAndDstPredict, lockedRegs, op1->gtOp.gtOp2->gtRsvdRegs | avoidReg | RBM_LASTUSE); - regMask |= rpPredictTreeRegUse(op1->gtOp.gtOp2, srcAndDstPredict, lockedRegs | regMask, avoidReg); - } - - // We need at least one scratch register for a GT_COPYBLK - if (GenTree::OperIsCopyBlkOp(oper)) - { - // Pick a low register to reduce the code size - regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg); - } - - if (useLoop) - { - if (GenTree::OperIsCopyBlkOp(oper)) - { - // We need a second temp register for a GT_COPYBLK (our code gen is load two/store two) - // Pick another low register to reduce the code size - regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg); - } - - // We need a loop index register - regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask); - } - - tree->gtUsedRegs = op1->gtOp.gtOp1->gtUsedRegs | - op1->gtOp.gtOp2->gtUsedRegs | - (regMaskSmall)regMask; - - regMask = 0; - goto RETURN_CHECK; - } -#endif - // What order should the Dest, Val/Src, and Size be calculated - -#if defined(_TARGET_XARCH_) - fgOrderBlockOps(tree, - RBM_EDI, (oper == GT_INITBLK) ? RBM_EAX : RBM_ESI, RBM_ECX, - opsPtr, regsPtr); - - // We're going to use these, might as well make them available now - - codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX); - if (GenTree::OperIsCopyBlkOp(oper)) - codeGen->regSet.rsSetRegsModified(RBM_ESI); - -#elif defined(_TARGET_ARM_) - - if (useMemHelper) - { - // For all other cases that involve non-constants, we just call memcpy/memset - // JIT helpers - fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); - interferingRegs |= RBM_CALLEE_TRASH; -#ifdef DEBUG - if (verbose) - printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n"); -#endif - } - else // useBarriers - { - assert(useBarriers); - assert(GenTree::OperIsCopyBlkOp(oper)); - - fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr); - - // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper - interferingRegs |= RBM_CALLEE_TRASH_NOGC; -#ifdef DEBUG - if (verbose) - printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n"); -#endif - } -#else // !_TARGET_X86_ && !_TARGET_ARM_ -#error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK" -#endif // !_TARGET_X86_ && !_TARGET_ARM_ - regMask |= rpPredictTreeRegUse(opsPtr[0], - rpGetPredictForMask(regsPtr[0]), - lockedRegs, - opsPtr[1]->gtRsvdRegs | opsPtr[2]->gtRsvdRegs | RBM_LASTUSE); - regMask |= regsPtr[0]; - opsPtr[0]->gtUsedRegs |= regsPtr[0]; - rpRecordRegIntf(regsPtr[0], compCurLife - DEBUGARG("movsd dest")); - - regMask |= rpPredictTreeRegUse(opsPtr[1], - rpGetPredictForMask(regsPtr[1]), - lockedRegs | regMask, - opsPtr[2]->gtRsvdRegs | RBM_LASTUSE); - regMask |= regsPtr[1]; - opsPtr[1]->gtUsedRegs |= regsPtr[1]; - rpRecordRegIntf(regsPtr[1], compCurLife - DEBUGARG("movsd src")); - - regMask |= rpPredictTreeRegUse(opsPtr[2], - rpGetPredictForMask(regsPtr[2]), - lockedRegs | regMask, - RBM_NONE); - regMask |= regsPtr[2]; - opsPtr[2]->gtUsedRegs |= regsPtr[2]; - - tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | - opsPtr[1]->gtUsedRegs | - opsPtr[2]->gtUsedRegs | - (regMaskSmall)regMask; + interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg,lockedRegs,rsvdRegs); regMask = 0; goto RETURN_CHECK; - } case GT_OBJ: {