}
}
-
-// Generate code for division (or mod) by power of two
-// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
-// Op2 must be a contained integer constant.
-void
-CodeGen::genCodeForPow2Div(GenTreeOp* tree)
-{
-#if 0
- GenTree *dividend = tree->gtOp.gtOp1;
- GenTree *divisor = tree->gtOp.gtOp2;
- genTreeOps oper = tree->OperGet();
- emitAttr size = emitTypeSize(tree);
- emitter *emit = getEmitter();
- regNumber targetReg = tree->gtRegNum;
- var_types targetType = tree->TypeGet();
-
- bool isSigned = oper == GT_MOD || oper == GT_DIV;
-
- // precondition: extended dividend is in RDX:RAX
- // which means it is either all zeros or all ones
-
- noway_assert(divisor->isContained());
- GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
- int64_t imm = divImm->IconValue();
- ssize_t abs_imm = abs(imm);
- noway_assert(isPow2(abs_imm));
-
-
- if (isSigned)
- {
- if (imm == 1)
- {
- if (targetReg != REG_RAX)
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
-
- return;
- }
-
- if (abs_imm == 2)
- {
- if (oper == GT_MOD)
- {
- emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
- // xor with rdx will flip all bits if negative
- emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
- }
- else
- {
- assert(oper == GT_DIV);
- // add 1 if it's negative
- emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
- }
- }
- else
- {
- // add imm-1 if negative
- emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
- emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
- }
-
- if (oper == GT_DIV)
- {
- unsigned shiftAmount = genLog2(unsigned(abs_imm));
- inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);
-
- if (imm < 0)
- {
- emit->emitIns_R(INS_neg, size, REG_RAX);
- }
- }
- else
- {
- assert(oper == GT_MOD);
- if (abs_imm > 2)
- {
- emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
- }
- // RDX contains 'imm-1' if negative
- emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
- }
-
- if (targetReg != REG_RAX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
- }
- else
- {
- assert (imm > 0);
-
- if (targetReg != dividend->gtRegNum)
- {
- inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
- }
-
- if (oper == GT_UDIV)
- {
- inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
- }
- else
- {
- assert(oper == GT_UMOD);
-
- emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
- }
- }
-#else // !0
- NYI("genCodeForPow2Div");
-#endif // !0
-}
-
-
/***********************************************************************************************
* Generate code for localloc
*/
void genCodeForMulHi(GenTreeOp* treeNode);
- void genCodeForPow2Div(GenTreeOp* treeNode);
-
void genLeaInstruction(GenTreeAddrMode *lea);
void genSetRegToCond(regNumber dstReg, GenTreePtr tree);
gcInfo.gcMarkRegSetNpt(RBM_RDX);
}
- if (divisor->isContainedIntOrIImmed())
- {
- GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
- assert(divImm->IsIntCnsFitsInI32());
- ssize_t imm = divImm->IconValue();
- assert(isPow2(abs(imm)));
- genCodeForPow2Div(treeNode->AsOp());
- }
+ // Perform the 'targetType' (64-bit or 32-bit) divide instruction
+ instruction ins;
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ ins = INS_div;
else
- {
- // Perform the 'targetType' (64-bit or 32-bit) divide instruction
- instruction ins;
- if (oper == GT_UMOD || oper == GT_UDIV)
- ins = INS_div;
- else
- ins = INS_idiv;
+ ins = INS_idiv;
- emit->emitInsBinary(ins, size, treeNode, divisor);
+ emit->emitInsBinary(ins, size, treeNode, divisor);
- // Signed divide RDX:RAX by r/m64, with result
- // stored in RAX := Quotient, RDX := Remainder.
- // Move the result to the desired register, if necessary
- if (oper == GT_DIV || oper == GT_UDIV)
+ // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
+ // Move the result to the desired register, if necessary
+ if (oper == GT_DIV || oper == GT_UDIV)
+ {
+ if (targetReg != REG_RAX)
{
- if (targetReg != REG_RAX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
+ inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
}
- else
+ }
+ else
+ {
+ assert((oper == GT_MOD) || (oper == GT_UMOD));
+ if (targetReg != REG_RDX)
{
- assert((oper == GT_MOD) || (oper == GT_UMOD));
- if (targetReg != REG_RDX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
- }
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
}
}
}
#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}
-// Generate code for division (or mod) by power of two
-// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
-// Op2 must be a contained integer constant.
-void
-CodeGen::genCodeForPow2Div(GenTreeOp* tree)
-{
- GenTree *dividend = tree->gtOp.gtOp1;
- GenTree *divisor = tree->gtOp.gtOp2;
- genTreeOps oper = tree->OperGet();
- emitAttr size = emitTypeSize(tree);
- emitter *emit = getEmitter();
- regNumber targetReg = tree->gtRegNum;
- var_types targetType = tree->TypeGet();
-
- bool isSigned = oper == GT_MOD || oper == GT_DIV;
-
- // precondition: extended dividend is in RDX:RAX
- // which means it is either all zeros or all ones
-
- noway_assert(divisor->isContained());
- GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
- ssize_t imm = divImm->IconValue();
- ssize_t abs_imm = abs(imm);
- noway_assert(isPow2(abs_imm));
-
-
- if (isSigned)
- {
- if (imm == 1)
- {
- if (oper == GT_DIV)
- {
- if (targetReg != REG_RAX)
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
- else
- {
- assert(oper == GT_MOD);
- instGen_Set_Reg_To_Zero(size, targetReg);
- }
-
- return;
- }
-
- if (abs_imm == 2)
- {
- if (oper == GT_MOD)
- {
- emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
- // xor with rdx will flip all bits if negative
- emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
- }
- else
- {
- assert(oper == GT_DIV);
- // add 1 if it's negative
- emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
- }
- }
- else
- {
- // add imm-1 if negative
- emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
- emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
- }
-
- if (oper == GT_DIV)
- {
- unsigned shiftAmount = genLog2(unsigned(abs_imm));
- inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);
-
- if (imm < 0)
- {
- emit->emitIns_R(INS_neg, size, REG_RAX);
- }
- }
- else
- {
- assert(oper == GT_MOD);
- if (abs_imm > 2)
- {
- emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
- }
- // RDX contains 'imm-1' if negative
- emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
- }
-
- if (targetReg != REG_RAX)
- {
- inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
- }
- }
- else
- {
- assert (imm > 0);
-
- if (targetReg != dividend->gtRegNum)
- {
- inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
- }
-
- if (oper == GT_UDIV)
- {
- inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
- }
- else
- {
- assert(oper == GT_UMOD);
-
- emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
- }
- }
-}
-
/***********************************************************************************************
* Generate code for localloc
return store;
}
+//-----------------------------------------------------------------------------------------------
+// CreateTemporary: Store the result of the given tree in a newly created temporary local
+// variable and replace the original use of the tree with the temporary.
+//
+// Arguments:
+// ppTree - a pointer to the tree use to replace.
+//
+// Return Value:
+// The newly created store statement.
+//
+// Assumptions:
+// This may only be called during tree lowering. The callee must ensure that the tree has already
+// been lowered and is part of compCurStmt and that compCurStmt is in compCurBB.
+//
+// Notes:
+// The newly created statement is usually an embedded statement but it can also be a top-level
+// statement if the tree to be replaced extends to the begining of the current statement. If
+// a top-level statement is created any embedded statements contained in the tree move to the
+// the new top-level statement, before the current statement. Such embedded statements need to
+// be lowered here because the normal lowering code path won't reach them anymore.
+//
+// TODO-Cleanup:
+// Some uses of fgInsertEmbeddedFormTemp in lowering could be replaced with this to avoid
+// duplication, see LowerArrElem for example.
+
+GenTreeStmt* Lowering::CreateTemporary(GenTree** ppTree)
+{
+ GenTreeStmt* newStmt = comp->fgInsertEmbeddedFormTemp(ppTree);
+
+ // The tree is assumed to be already lowered so the newly created statement
+ // should not be lowered again.
+ newStmt->gtFlags |= GTF_STMT_SKIP_LOWER;
+
+ assert(newStmt->gtStmtExpr->OperIsLocalStore());
+
+ // If the newly created statement is top-level then we need to manually lower its embedded
+ // statements, the tree is lowered but some of its embedded statements are yet to be lowered.
+ if (newStmt->gtStmtIsTopLevel())
+ {
+ GenTree* curStmt = comp->compCurStmt;
+
+ for (GenTree* nextEmbeddedStmt = newStmt->gtStmtNextIfEmbedded();
+ nextEmbeddedStmt != nullptr;
+ nextEmbeddedStmt = nextEmbeddedStmt->gtStmt.gtStmtNextIfEmbedded())
+ {
+ // A previous call to CreateTemporary could have created embedded statements
+ // from the tree and those are already lowered.
+ if ((nextEmbeddedStmt->gtFlags & GTF_STMT_SKIP_LOWER) != 0)
+ continue;
+
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ printf("Lowering BB%02u, stmt id %u\n", currBlock->bbNum, nextEmbeddedStmt->gtTreeID);
+ }
+#endif
+ comp->compCurStmt = nextEmbeddedStmt;
+ comp->fgWalkTreePost(&nextEmbeddedStmt->gtStmt.gtStmtExpr, &Lowering::LowerNodeHelper, this, true);
+ nextEmbeddedStmt->gtFlags |= GTF_STMT_SKIP_LOWER;
+
+ // Lowering can remove the statement and set compCurStmt to another suitable statement.
+ // Currently only switch lowering does this and since embedded statements can't contain
+ // a GT_SWITCH this case should never be hit here.
+ assert(comp->compCurStmt == nextEmbeddedStmt);
+ }
+
+ comp->compCurStmt = curStmt;
+ }
+
+ return newStmt;
+}
+
// This is the main entry point for Lowering.
// In addition to that, LowerNode is also responsible for initializing the
LowerAdd(ppTree, data);
break;
+ case GT_UDIV:
+ case GT_UMOD:
+ LowerUnsignedDivOrMod(*ppTree);
+ break;
+
+ case GT_DIV:
+ case GT_MOD:
+ LowerSignedDivOrMod(ppTree, data);
+ break;
+
case GT_SWITCH:
LowerSwitch(ppTree);
break;
#endif // !_TARGET_ARMARCH_
}
+//------------------------------------------------------------------------
+// LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2
+// divisor into GT_RSZ/GT_AND nodes.
+//
+// Arguments:
+// tree: pointer to the GT_UDIV/GT_UMOD node to be lowered
+
+void Lowering::LowerUnsignedDivOrMod(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_UDIV || tree->OperGet() == GT_UMOD);
+
+ GenTree* divisor = tree->gtGetOp2();
+
+ if (divisor->IsCnsIntOrI())
+ {
+ size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue());
+
+ if (isPow2(divisorValue))
+ {
+ genTreeOps newOper;
+
+ if (tree->OperGet() == GT_UDIV)
+ {
+ newOper = GT_RSZ;
+ divisorValue = genLog2(divisorValue);
+ }
+ else
+ {
+ newOper = GT_AND;
+ divisorValue -= 1;
+ }
+
+ tree->SetOper(newOper);
+ divisor->gtIntCon.SetIconValue(divisorValue);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
+// const divisor into equivalent but faster sequences.
+//
+// Arguments:
+// pTree: pointer to the parent node's link to the node we care about
+
+void Lowering::LowerSignedDivOrMod(GenTreePtr* ppTree, Compiler::fgWalkData* data)
+{
+ GenTree* divMod = *ppTree;
+ assert(divMod->OperGet() == GT_DIV || divMod->OperGet() == GT_MOD);
+ GenTree* divisor = divMod->gtGetOp2();
+
+ if (divisor->IsCnsIntOrI())
+ {
+ const var_types type = divMod->TypeGet();
+ assert(type == TYP_INT || type == TYP_LONG);
+
+ GenTree* dividend = divMod->gtGetOp1();
+
+ if (dividend->IsCnsIntOrI())
+ {
+ // We shouldn't see a divmod with constant operands here but if we do then it's likely
+ // because optimizations are disabled or it's a case that's supposed to throw an exception.
+ // Don't optimize this.
+ return;
+ }
+
+ ssize_t divisorValue = divisor->gtIntCon.IconValue();
+
+ if (divisorValue == -1)
+ {
+ // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
+
+ // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
+ // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this
+ // case so optimizing this case would break C# code.
+
+ // A runtime check could be used to handle this case but it's probably too rare to matter.
+ return;
+ }
+
+ bool isDiv = divMod->OperGet() == GT_DIV;
+
+ if (isDiv)
+ {
+ if ((type == TYP_INT && divisorValue == INT_MIN) ||
+ (type == TYP_LONG && divisorValue == INT64_MIN))
+ {
+ // If the divisor is the minimum representable integer value then we can use a compare,
+ // the result is 1 iff the dividend equals divisor.
+ divMod->SetOper(GT_EQ);
+ return;
+ }
+ }
+
+ size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
+
+ if (isPow2(absDivisorValue))
+ {
+ // We need to use the dividend node multiple times so its value needs to be
+ // computed once and stored in a temp variable.
+ CreateTemporary(&(divMod->gtOp.gtOp1));
+ dividend = divMod->gtGetOp1();
+
+ GenTreeStmt* curStmt = comp->compCurStmt->AsStmt();
+ unsigned curBBWeight = currBlock->getBBWeight(comp);
+ unsigned dividendLclNum = dividend->gtLclVar.gtLclNum;
+
+ GenTree* adjustment = comp->gtNewOperNode(
+ GT_RSH, type,
+ dividend,
+ comp->gtNewIconNode(type == TYP_INT ? 31 : 63));
+
+ if (absDivisorValue == 2)
+ {
+ // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1.
+ // We can get the same result by using GT_RSZ instead of GT_RSH.
+ adjustment->SetOper(GT_RSZ);
+ }
+ else
+ {
+ adjustment = comp->gtNewOperNode(
+ GT_AND, type,
+ adjustment,
+ comp->gtNewIconNode(absDivisorValue - 1, type));
+ }
+
+ GenTree* adjustedDividend = comp->gtNewOperNode(
+ GT_ADD, type,
+ adjustment,
+ comp->gtNewLclvNode(dividendLclNum, type));
+
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+
+ GenTree* newDivMod;
+
+ if (isDiv)
+ {
+ // perform the division by right shifting the adjusted dividend
+ divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
+
+ newDivMod = comp->gtNewOperNode(
+ GT_RSH, type,
+ adjustedDividend,
+ divisor);
+
+ if (divisorValue < 0)
+ {
+ // negate the result if the divisor is negative
+ newDivMod = comp->gtNewOperNode(
+ GT_NEG, type,
+ newDivMod);
+ }
+ }
+ else
+ {
+ // divisor % dividend = dividend - divisor x (dividend / divisor)
+ // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor)
+ // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1)
+ divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1));
+
+ newDivMod = comp->gtNewOperNode(
+ GT_SUB, type,
+ comp->gtNewLclvNode(dividendLclNum, type),
+ comp->gtNewOperNode(
+ GT_AND, type,
+ adjustedDividend,
+ divisor));
+
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+ }
+
+ // Remove the divisor and dividend nodes from the linear order,
+ // since we have reused them and will resequence the tree
+ comp->fgSnipNode(curStmt, divisor);
+ comp->fgSnipNode(curStmt, dividend);
+
+ // linearize and insert the new tree before the original divMod node
+ comp->gtSetEvalOrder(newDivMod);
+ comp->fgSetTreeSeq(newDivMod);
+ comp->fgInsertTreeInListBefore(newDivMod, divMod, curStmt);
+ comp->fgSnipNode(curStmt, divMod);
+
+ // the divMod that we've replaced could have been a call arg
+ comp->fgFixupIfCallArg(data->parentStack, divMod, newDivMod);
+
+ // replace the original divmod node with the new divmod tree
+ *ppTree = newDivMod;
+
+ return;
+ }
+ }
+}
//------------------------------------------------------------------------
// LowerInd: attempt to transform indirected expression into an addressing mode
comp->lvaPromoteLongVars();
#endif // !defined(_TARGET_64BIT_)
-#ifdef DEBUG
- unsigned stmtNum = 0;
-#endif
for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
{
GenTreePtr stmt;
continue;
}
#ifdef DEBUG
- ++stmtNum;
if (comp->verbose)
{
- // This is a useful location for a conditional breakpoint in Visual Studio (i.e. when stmtNum == 15)
- printf("Decomposing BB%02u, stmt %u\n", block->bbNum, stmtNum);
+ printf("Decomposing BB%02u, stmt id %u\n", block->bbNum, stmt->gtTreeID);
}
#endif
comp->compCurStmt = stmt;
}
#endif //!_TARGET_64BIT_
-#ifdef DEBUG
- stmtNum = 0;
-#endif
// Walk the statement trees in this basic block
for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
{
continue;
}
#ifdef DEBUG
- ++stmtNum;
- if (comp->verbose)
+ if (comp->verbose)
{
- // This is a useful location for a conditional breakpoint in Visual Studio (i.e. when stmtNum == 15)
- printf("Lowering BB%02u, stmt %u\n", block->bbNum, stmtNum);
+ printf("Lowering BB%02u, stmt id %u\n", block->bbNum, stmt->gtTreeID);
}
#endif
comp->compCurStmt = stmt;
void LowerInd(GenTreePtr* ppTree);
void LowerAddrMode(GenTreePtr* ppTree, GenTree* before, Compiler::fgWalkData* data, bool isIndir);
void LowerAdd(GenTreePtr* ppTree, Compiler::fgWalkData* data);
+ void LowerUnsignedDivOrMod(GenTree* tree);
+ void LowerSignedDivOrMod(GenTreePtr* ppTree, Compiler::fgWalkData* data);
// Remove the nodes that are no longer used after an addressing mode is constructed under a GT_IND
void LowerIndCleanupHelper(GenTreeAddrMode* addrMode, GenTreePtr tree);
static bool NodesAreEquivalentLeaves (GenTreePtr candidate, GenTreePtr storeInd);
GenTreePtr CreateLocalTempAsg (GenTreePtr rhs, unsigned refCount, GenTreePtr *ppLclVar = nullptr);
-
+ GenTreeStmt* CreateTemporary (GenTree** ppTree);
bool AreSourcesPossiblyModified (GenTree* use, GenTree* src1, GenTree *src2);
void ReplaceNode (GenTree** ppTreeLocation,
GenTree* replacementNode,
case GT_MULHI:
case GT_UDIV:
{
- // TODO-ARM64-CQ: Optimize a divide by power of 2 as we do for AMD64
info->srcCount = 2;
info->dstCount = 1;
}
op1 = tree->gtOp.gtOp1;
op2 = tree->gtOp.gtOp2;
- // See if we have an optimizable power of 2 which will be expanded
- // using instructions other than division.
- // (fgMorph has already done magic number transforms)
-
- if (op2->IsIntCnsFitsInI32())
- {
- bool isSigned = tree->OperGet() == GT_MOD || tree->OperGet() == GT_DIV;
- ssize_t amount = op2->gtIntConCommon.IconValue();
-
- if (isPow2(abs(amount)) && (isSigned || amount > 0)
- && amount != -1)
- {
- MakeSrcContained(tree, op2);
-
- if (isSigned)
- {
- // we are going to use CDQ instruction so want these RDX:RAX
- info->setDstCandidates(l, RBM_RAX);
- // If possible would like to have op1 in RAX to avoid a register move
- op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
- break;
- }
- }
-
// Amd64 Div/Idiv instruction:
// Dividend in RAX:RDX and computes
// Quotient in RAX, Remainder in RDX
return false;
// codegen will expand these
- if (isPow2(cons))
+ if (cons == SSIZE_T_MIN || isPow2(abs(cons)))
return false;
// someone else will fold this away, so don't make it complicated for them