From: Mike Danes Date: Mon, 9 May 2016 22:01:12 +0000 (+0300) Subject: Optimize integer div/mod by const power of 2 in lowering X-Git-Tag: accepted/tizen/base/20180629.140029~4224^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=babfca85887cc455ece64766c4614d494a3419b8;p=platform%2Fupstream%2Fcoreclr.git Optimize integer div/mod by const power of 2 in lowering Optimizing GT_DIV/GT_UDIV/GT_MOD/GT_UMOD by power of 2 in codegen is problematic because the xarch DIV instruction has special register requirements. By the time codegen decides to perform the optimization the rax and rdx registers have been already allocated by LSRA even though they're not always needed (as it happens in the case of unsigned division where CDQ isn't used). Since the JIT can't represent a CDQ instruction in its IR an arithmetic shift (GT_RSH) has been instead to extract the dividend sign. xarch's SAR is larger than CDQ but it has the advantage that it doesn't require specific registers. Also, arithmetic shifts are available on architectures other than xarch. Example: method "static int foo(int x) => x / 8;" is now compiled to mov eax, ecx mov edx, eax sar edx, 31 and edx, 7 add edx, eax mov eax, edx sar eax, 3 instead of mov eax, ecx cdq and edx, 7 add eax, edx sar eax, 3 As a side-effect of this change the optimization now also works when the divisor is too large to be contained. Previously this wasn't possible because the divisor constant needed to be modified during codegen but the constant was already loaded into a register. Example: method "static ulong foo(ulong x) => x / 4294967296;" is now compiled to mov rax, rcx shr rax, 32 whereas before a DIV instruction was used. This change also fixes an issue in fgShouldUseMagicNumberDivide. The optimization that is done in lower can handle negative power of 2 divisors but fgShouldUseMagicNumberDivide handled those cases because it didn't check the absolute value of the divisor. Example: method "static int foo(int x) => return x / -2;" is now compiled to mov eax, ecx mov edx, eax shr edx, 31 add edx, eax sar edx, 1 mov eax, edx neg eax instead of mov eax, 0x7FFFFFFF imul edx:eax, ecx mov eax, edx sub eax, ecx mov edx, eax shr edx, 31 add eax, edx --- diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index b690695..4f15fe3 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -3365,118 +3365,6 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) } } - -// Generate code for division (or mod) by power of two -// or negative powers of two. (meaning -1 * a power of two, not 2^(-1)) -// Op2 must be a contained integer constant. -void -CodeGen::genCodeForPow2Div(GenTreeOp* tree) -{ -#if 0 - GenTree *dividend = tree->gtOp.gtOp1; - GenTree *divisor = tree->gtOp.gtOp2; - genTreeOps oper = tree->OperGet(); - emitAttr size = emitTypeSize(tree); - emitter *emit = getEmitter(); - regNumber targetReg = tree->gtRegNum; - var_types targetType = tree->TypeGet(); - - bool isSigned = oper == GT_MOD || oper == GT_DIV; - - // precondition: extended dividend is in RDX:RAX - // which means it is either all zeros or all ones - - noway_assert(divisor->isContained()); - GenTreeIntConCommon* divImm = divisor->AsIntConCommon(); - int64_t imm = divImm->IconValue(); - ssize_t abs_imm = abs(imm); - noway_assert(isPow2(abs_imm)); - - - if (isSigned) - { - if (imm == 1) - { - if (targetReg != REG_RAX) - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - - return; - } - - if (abs_imm == 2) - { - if (oper == GT_MOD) - { - emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1 - // xor with rdx will flip all bits if negative - emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0 - } - else - { - assert(oper == GT_DIV); - // add 1 if it's negative - emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); - } - } - else - { - // add imm-1 if negative - emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1); - emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX); - } - - if (oper == GT_DIV) - { - unsigned shiftAmount = genLog2(unsigned(abs_imm)); - inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount); - - if (imm < 0) - { - emit->emitIns_R(INS_neg, size, REG_RAX); - } - } - else - { - assert(oper == GT_MOD); - if (abs_imm > 2) - { - emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1); - } - // RDX contains 'imm-1' if negative - emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); - } - - if (targetReg != REG_RAX) - { - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - } - } - else - { - assert (imm > 0); - - if (targetReg != dividend->gtRegNum) - { - inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType); - } - - if (oper == GT_UDIV) - { - inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm))); - } - else - { - assert(oper == GT_UMOD); - - emit->emitIns_R_I(INS_and, size, targetReg, imm -1); - } - } -#else // !0 - NYI("genCodeForPow2Div"); -#endif // !0 -} - - /*********************************************************************************************** * Generate code for localloc */ diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 3c96fbe..6ffccbf 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -20,8 +20,6 @@ void genCodeForMulHi(GenTreeOp* treeNode); - void genCodeForPow2Div(GenTreeOp* treeNode); - void genLeaInstruction(GenTreeAddrMode *lea); void genSetRegToCond(regNumber dstReg, GenTreePtr tree); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index a2bdb38..84803f0 100755 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1280,42 +1280,30 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) gcInfo.gcMarkRegSetNpt(RBM_RDX); } - if (divisor->isContainedIntOrIImmed()) - { - GenTreeIntConCommon* divImm = divisor->AsIntConCommon(); - assert(divImm->IsIntCnsFitsInI32()); - ssize_t imm = divImm->IconValue(); - assert(isPow2(abs(imm))); - genCodeForPow2Div(treeNode->AsOp()); - } + // Perform the 'targetType' (64-bit or 32-bit) divide instruction + instruction ins; + if (oper == GT_UMOD || oper == GT_UDIV) + ins = INS_div; else - { - // Perform the 'targetType' (64-bit or 32-bit) divide instruction - instruction ins; - if (oper == GT_UMOD || oper == GT_UDIV) - ins = INS_div; - else - ins = INS_idiv; + ins = INS_idiv; - emit->emitInsBinary(ins, size, treeNode, divisor); + emit->emitInsBinary(ins, size, treeNode, divisor); - // Signed divide RDX:RAX by r/m64, with result - // stored in RAX := Quotient, RDX := Remainder. - // Move the result to the desired register, if necessary - if (oper == GT_DIV || oper == GT_UDIV) + // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX. + // Move the result to the desired register, if necessary + if (oper == GT_DIV || oper == GT_UDIV) + { + if (targetReg != REG_RAX) { - if (targetReg != REG_RAX) - { - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - } + inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); } - else + } + else + { + assert((oper == GT_MOD) || (oper == GT_UMOD)); + if (targetReg != REG_RDX) { - assert((oper == GT_MOD) || (oper == GT_UMOD)); - if (targetReg != REG_RDX) - { - inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); - } + inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } } } @@ -2888,120 +2876,6 @@ CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING } -// Generate code for division (or mod) by power of two -// or negative powers of two. (meaning -1 * a power of two, not 2^(-1)) -// Op2 must be a contained integer constant. -void -CodeGen::genCodeForPow2Div(GenTreeOp* tree) -{ - GenTree *dividend = tree->gtOp.gtOp1; - GenTree *divisor = tree->gtOp.gtOp2; - genTreeOps oper = tree->OperGet(); - emitAttr size = emitTypeSize(tree); - emitter *emit = getEmitter(); - regNumber targetReg = tree->gtRegNum; - var_types targetType = tree->TypeGet(); - - bool isSigned = oper == GT_MOD || oper == GT_DIV; - - // precondition: extended dividend is in RDX:RAX - // which means it is either all zeros or all ones - - noway_assert(divisor->isContained()); - GenTreeIntConCommon* divImm = divisor->AsIntConCommon(); - ssize_t imm = divImm->IconValue(); - ssize_t abs_imm = abs(imm); - noway_assert(isPow2(abs_imm)); - - - if (isSigned) - { - if (imm == 1) - { - if (oper == GT_DIV) - { - if (targetReg != REG_RAX) - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - } - else - { - assert(oper == GT_MOD); - instGen_Set_Reg_To_Zero(size, targetReg); - } - - return; - } - - if (abs_imm == 2) - { - if (oper == GT_MOD) - { - emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1 - // xor with rdx will flip all bits if negative - emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0 - } - else - { - assert(oper == GT_DIV); - // add 1 if it's negative - emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); - } - } - else - { - // add imm-1 if negative - emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1); - emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX); - } - - if (oper == GT_DIV) - { - unsigned shiftAmount = genLog2(unsigned(abs_imm)); - inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount); - - if (imm < 0) - { - emit->emitIns_R(INS_neg, size, REG_RAX); - } - } - else - { - assert(oper == GT_MOD); - if (abs_imm > 2) - { - emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1); - } - // RDX contains 'imm-1' if negative - emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); - } - - if (targetReg != REG_RAX) - { - inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); - } - } - else - { - assert (imm > 0); - - if (targetReg != dividend->gtRegNum) - { - inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType); - } - - if (oper == GT_UDIV) - { - inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm))); - } - else - { - assert(oper == GT_UMOD); - - emit->emitIns_R_I(INS_and, size, targetReg, imm -1); - } - } -} - /*********************************************************************************************** * Generate code for localloc diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 060a00e..6fd97cb 100755 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -864,6 +864,78 @@ GenTreePtr Lowering::CreateLocalTempAsg(GenTreePtr rhs, return store; } +//----------------------------------------------------------------------------------------------- +// CreateTemporary: Store the result of the given tree in a newly created temporary local +// variable and replace the original use of the tree with the temporary. +// +// Arguments: +// ppTree - a pointer to the tree use to replace. +// +// Return Value: +// The newly created store statement. +// +// Assumptions: +// This may only be called during tree lowering. The callee must ensure that the tree has already +// been lowered and is part of compCurStmt and that compCurStmt is in compCurBB. +// +// Notes: +// The newly created statement is usually an embedded statement but it can also be a top-level +// statement if the tree to be replaced extends to the begining of the current statement. If +// a top-level statement is created any embedded statements contained in the tree move to the +// the new top-level statement, before the current statement. Such embedded statements need to +// be lowered here because the normal lowering code path won't reach them anymore. +// +// TODO-Cleanup: +// Some uses of fgInsertEmbeddedFormTemp in lowering could be replaced with this to avoid +// duplication, see LowerArrElem for example. + +GenTreeStmt* Lowering::CreateTemporary(GenTree** ppTree) +{ + GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(ppTree); + + // The tree is assumed to be already lowered so the newly created statement + // should not be lowered again. + newStmt->gtFlags |= GTF_STMT_SKIP_LOWER; + + assert(newStmt->gtStmtExpr->OperIsLocalStore()); + + // If the newly created statement is top-level then we need to manually lower its embedded + // statements, the tree is lowered but some of its embedded statements are yet to be lowered. + if (newStmt->gtStmtIsTopLevel()) + { + GenTree* curStmt = comp->compCurStmt; + + for (GenTree* nextEmbeddedStmt = newStmt->gtStmtNextIfEmbedded(); + nextEmbeddedStmt != nullptr; + nextEmbeddedStmt = nextEmbeddedStmt->gtStmt.gtStmtNextIfEmbedded()) + { + // A previous call to CreateTemporary could have created embedded statements + // from the tree and those are already lowered. + if ((nextEmbeddedStmt->gtFlags & GTF_STMT_SKIP_LOWER) != 0) + continue; + +#ifdef DEBUG + if (comp->verbose) + { + printf("Lowering BB%02u, stmt id %u\n", currBlock->bbNum, nextEmbeddedStmt->gtTreeID); + } +#endif + comp->compCurStmt = nextEmbeddedStmt; + comp->fgWalkTreePost(&nextEmbeddedStmt->gtStmt.gtStmtExpr, &Lowering::LowerNodeHelper, this, true); + nextEmbeddedStmt->gtFlags |= GTF_STMT_SKIP_LOWER; + + // Lowering can remove the statement and set compCurStmt to another suitable statement. + // Currently only switch lowering does this and since embedded statements can't contain + // a GT_SWITCH this case should never be hit here. + assert(comp->compCurStmt == nextEmbeddedStmt); + } + + comp->compCurStmt = curStmt; + } + + return newStmt; +} + // This is the main entry point for Lowering. // In addition to that, LowerNode is also responsible for initializing the @@ -897,6 +969,16 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data) LowerAdd(ppTree, data); break; + case GT_UDIV: + case GT_UMOD: + LowerUnsignedDivOrMod(*ppTree); + break; + + case GT_DIV: + case GT_MOD: + LowerSignedDivOrMod(ppTree, data); + break; + case GT_SWITCH: LowerSwitch(ppTree); break; @@ -3761,6 +3843,198 @@ void Lowering::LowerAdd(GenTreePtr* pTree, Compiler::fgWalkData* data) #endif // !_TARGET_ARMARCH_ } +//------------------------------------------------------------------------ +// LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2 +// divisor into GT_RSZ/GT_AND nodes. +// +// Arguments: +// tree: pointer to the GT_UDIV/GT_UMOD node to be lowered + +void Lowering::LowerUnsignedDivOrMod(GenTree* tree) +{ + assert(tree->OperGet() == GT_UDIV || tree->OperGet() == GT_UMOD); + + GenTree* divisor = tree->gtGetOp2(); + + if (divisor->IsCnsIntOrI()) + { + size_t divisorValue = static_cast(divisor->gtIntCon.IconValue()); + + if (isPow2(divisorValue)) + { + genTreeOps newOper; + + if (tree->OperGet() == GT_UDIV) + { + newOper = GT_RSZ; + divisorValue = genLog2(divisorValue); + } + else + { + newOper = GT_AND; + divisorValue -= 1; + } + + tree->SetOper(newOper); + divisor->gtIntCon.SetIconValue(divisorValue); + } + } +} + +//------------------------------------------------------------------------ +// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2 +// const divisor into equivalent but faster sequences. +// +// Arguments: +// pTree: pointer to the parent node's link to the node we care about + +void Lowering::LowerSignedDivOrMod(GenTreePtr* ppTree, Compiler::fgWalkData* data) +{ + GenTree* divMod = *ppTree; + assert(divMod->OperGet() == GT_DIV || divMod->OperGet() == GT_MOD); + GenTree* divisor = divMod->gtGetOp2(); + + if (divisor->IsCnsIntOrI()) + { + const var_types type = divMod->TypeGet(); + assert(type == TYP_INT || type == TYP_LONG); + + GenTree* dividend = divMod->gtGetOp1(); + + if (dividend->IsCnsIntOrI()) + { + // We shouldn't see a divmod with constant operands here but if we do then it's likely + // because optimizations are disabled or it's a case that's supposed to throw an exception. + // Don't optimize this. + return; + } + + ssize_t divisorValue = divisor->gtIntCon.IconValue(); + + if (divisorValue == -1) + { + // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception. + + // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is + // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this + // case so optimizing this case would break C# code. + + // A runtime check could be used to handle this case but it's probably too rare to matter. + return; + } + + bool isDiv = divMod->OperGet() == GT_DIV; + + if (isDiv) + { + if ((type == TYP_INT && divisorValue == INT_MIN) || + (type == TYP_LONG && divisorValue == INT64_MIN)) + { + // If the divisor is the minimum representable integer value then we can use a compare, + // the result is 1 iff the dividend equals divisor. + divMod->SetOper(GT_EQ); + return; + } + } + + size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast(divisorValue) : static_cast(abs(divisorValue)); + + if (isPow2(absDivisorValue)) + { + // We need to use the dividend node multiple times so its value needs to be + // computed once and stored in a temp variable. + CreateTemporary(&(divMod->gtOp.gtOp1)); + dividend = divMod->gtGetOp1(); + + GenTreeStmt* curStmt = comp->compCurStmt->AsStmt(); + unsigned curBBWeight = currBlock->getBBWeight(comp); + unsigned dividendLclNum = dividend->gtLclVar.gtLclNum; + + GenTree* adjustment = comp->gtNewOperNode( + GT_RSH, type, + dividend, + comp->gtNewIconNode(type == TYP_INT ? 31 : 63)); + + if (absDivisorValue == 2) + { + // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1. + // We can get the same result by using GT_RSZ instead of GT_RSH. + adjustment->SetOper(GT_RSZ); + } + else + { + adjustment = comp->gtNewOperNode( + GT_AND, type, + adjustment, + comp->gtNewIconNode(absDivisorValue - 1, type)); + } + + GenTree* adjustedDividend = comp->gtNewOperNode( + GT_ADD, type, + adjustment, + comp->gtNewLclvNode(dividendLclNum, type)); + + comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); + + GenTree* newDivMod; + + if (isDiv) + { + // perform the division by right shifting the adjusted dividend + divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue)); + + newDivMod = comp->gtNewOperNode( + GT_RSH, type, + adjustedDividend, + divisor); + + if (divisorValue < 0) + { + // negate the result if the divisor is negative + newDivMod = comp->gtNewOperNode( + GT_NEG, type, + newDivMod); + } + } + else + { + // divisor % dividend = dividend - divisor x (dividend / divisor) + // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor) + // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1) + divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1)); + + newDivMod = comp->gtNewOperNode( + GT_SUB, type, + comp->gtNewLclvNode(dividendLclNum, type), + comp->gtNewOperNode( + GT_AND, type, + adjustedDividend, + divisor)); + + comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); + } + + // Remove the divisor and dividend nodes from the linear order, + // since we have reused them and will resequence the tree + comp->fgSnipNode(curStmt, divisor); + comp->fgSnipNode(curStmt, dividend); + + // linearize and insert the new tree before the original divMod node + comp->gtSetEvalOrder(newDivMod); + comp->fgSetTreeSeq(newDivMod); + comp->fgInsertTreeInListBefore(newDivMod, divMod, curStmt); + comp->fgSnipNode(curStmt, divMod); + + // the divMod that we've replaced could have been a call arg + comp->fgFixupIfCallArg(data->parentStack, divMod, newDivMod); + + // replace the original divmod node with the new divmod tree + *ppTree = newDivMod; + + return; + } + } +} //------------------------------------------------------------------------ // LowerInd: attempt to transform indirected expression into an addressing mode @@ -4040,9 +4314,6 @@ void Lowering::DoPhase() comp->lvaPromoteLongVars(); #endif // !defined(_TARGET_64BIT_) -#ifdef DEBUG - unsigned stmtNum = 0; -#endif for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext) { GenTreePtr stmt; @@ -4062,11 +4333,9 @@ void Lowering::DoPhase() continue; } #ifdef DEBUG - ++stmtNum; if (comp->verbose) { - // This is a useful location for a conditional breakpoint in Visual Studio (i.e. when stmtNum == 15) - printf("Decomposing BB%02u, stmt %u\n", block->bbNum, stmtNum); + printf("Decomposing BB%02u, stmt id %u\n", block->bbNum, stmt->gtTreeID); } #endif comp->compCurStmt = stmt; @@ -4074,9 +4343,6 @@ void Lowering::DoPhase() } #endif //!_TARGET_64BIT_ -#ifdef DEBUG - stmtNum = 0; -#endif // Walk the statement trees in this basic block for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) { @@ -4085,11 +4351,9 @@ void Lowering::DoPhase() continue; } #ifdef DEBUG - ++stmtNum; - if (comp->verbose) + if (comp->verbose) { - // This is a useful location for a conditional breakpoint in Visual Studio (i.e. when stmtNum == 15) - printf("Lowering BB%02u, stmt %u\n", block->bbNum, stmtNum); + printf("Lowering BB%02u, stmt id %u\n", block->bbNum, stmt->gtTreeID); } #endif comp->compCurStmt = stmt; diff --git a/src/jit/lower.h b/src/jit/lower.h index 6cd5e54..e9a3e65 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -165,6 +165,8 @@ private: void LowerInd(GenTreePtr* ppTree); void LowerAddrMode(GenTreePtr* ppTree, GenTree* before, Compiler::fgWalkData* data, bool isIndir); void LowerAdd(GenTreePtr* ppTree, Compiler::fgWalkData* data); + void LowerUnsignedDivOrMod(GenTree* tree); + void LowerSignedDivOrMod(GenTreePtr* ppTree, Compiler::fgWalkData* data); // Remove the nodes that are no longer used after an addressing mode is constructed under a GT_IND void LowerIndCleanupHelper(GenTreeAddrMode* addrMode, GenTreePtr tree); @@ -201,7 +203,7 @@ private: static bool NodesAreEquivalentLeaves (GenTreePtr candidate, GenTreePtr storeInd); GenTreePtr CreateLocalTempAsg (GenTreePtr rhs, unsigned refCount, GenTreePtr *ppLclVar = nullptr); - + GenTreeStmt* CreateTemporary (GenTree** ppTree); bool AreSourcesPossiblyModified (GenTree* use, GenTree* src1, GenTree *src2); void ReplaceNode (GenTree** ppTreeLocation, GenTree* replacementNode, diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index 0aaa0b9..ad0ae9d 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -367,7 +367,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) case GT_MULHI: case GT_UDIV: { - // TODO-ARM64-CQ: Optimize a divide by power of 2 as we do for AMD64 info->srcCount = 2; info->dstCount = 1; } diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 64abf1e..8cd9d43 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -575,31 +575,6 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) op1 = tree->gtOp.gtOp1; op2 = tree->gtOp.gtOp2; - // See if we have an optimizable power of 2 which will be expanded - // using instructions other than division. - // (fgMorph has already done magic number transforms) - - if (op2->IsIntCnsFitsInI32()) - { - bool isSigned = tree->OperGet() == GT_MOD || tree->OperGet() == GT_DIV; - ssize_t amount = op2->gtIntConCommon.IconValue(); - - if (isPow2(abs(amount)) && (isSigned || amount > 0) - && amount != -1) - { - MakeSrcContained(tree, op2); - - if (isSigned) - { - // we are going to use CDQ instruction so want these RDX:RAX - info->setDstCandidates(l, RBM_RAX); - // If possible would like to have op1 in RAX to avoid a register move - op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX); - } - break; - } - } - // Amd64 Div/Idiv instruction: // Dividend in RAX:RDX and computes // Quotient in RAX, Remainder in RDX diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index a80640c..a3fb8bd 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -12752,7 +12752,7 @@ bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree) return false; // codegen will expand these - if (isPow2(cons)) + if (cons == SSIZE_T_MIN || isPow2(abs(cons))) return false; // someone else will fold this away, so don't make it complicated for them