// Generate code to get the high N bits of a N*N=2N bit multiplication result
void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
{
- assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ if (treeNode->OperGet() == GT_MULHI)
+ {
+ assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ }
assert(!treeNode->gtOverflowEx());
regNumber targetReg = treeNode->gtRegNum;
GenTree* rmOp = op2;
// Set rmOp to the contained memory operand (if any)
- //
if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
{
regOp = op2;
inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
}
- emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+ instruction ins;
+ if ((treeNode->gtFlags & GTF_UNSIGNED) == 0)
+ {
+ ins = INS_imulEAX;
+ }
+ else
+ {
+ ins = INS_mulEAX;
+ }
+ emit->emitInsBinary(ins, size, treeNode, rmOp);
// Move the result to the desired register, if necessary
- if (targetReg != REG_RDX)
+ if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX)
{
inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
}
assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
#else // !defined(_TARGET_64BIT_)
assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
- oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
+ oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_LONG || oper == GT_DIV_HI || oper == GT_MOD_HI ||
oper == GT_ADD || oper == GT_SUB);
#endif // !defined(_TARGET_64BIT_)
break;
case GT_MULHI:
+#ifdef _TARGET_X86_
+ case GT_MUL_LONG:
+#endif
genCodeForMulHi(treeNode->AsOp());
genProduceReg(treeNode);
break;
assert(varDsc->TypeGet() == TYP_LONG);
assert(!varDsc->lvPromoted);
GenTreePtr op1 = treeNode->gtOp.gtOp1;
- noway_assert(op1->OperGet() == GT_LONG);
+ noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG);
genConsumeRegs(op1);
- // Definitions of register candidates will have been lowered to 2 int lclVars.
- assert(!treeNode->InReg());
+ if (op1->OperGet() == GT_LONG)
+ {
+ // Definitions of register candidates will have been lowered to 2 int lclVars.
+ assert(!treeNode->InReg());
+
+ GenTreePtr loVal = op1->gtGetOp1();
+ GenTreePtr hiVal = op1->gtGetOp2();
+ // NYI: Contained immediates.
+ NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
+ emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
+ emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+ }
+ else if (op1->OperGet() == GT_MUL_LONG)
+ {
+ assert((op1->gtFlags & GTF_MUL_64RSLT) != 0);
- GenTreePtr loVal = op1->gtGetOp1();
- GenTreePtr hiVal = op1->gtGetOp2();
- // NYI: Contained immediates.
- NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+ // Stack store
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum, genTypeSize(TYP_INT));
+ }
}
#endif // !defined(_TARGET_64BIT_)
break;
case GT_MUL:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
if (tree->gtFlags & GTF_MUL_64RSLT)
{
break;
case GT_MUL:
- NYI("Arithmetic binary operators on TYP_LONG - GT_MUL");
+ nextNode = DecomposeMul(use);
break;
case GT_DIV:
GenTree* tree = use.Def();
GenTree* rhs = tree->gtGetOp1();
- if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL))
+ if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL) ||
+ ((rhs->OperGet() == GT_MUL_LONG) && (rhs->gtFlags & GTF_MUL_64RSLT) != 0))
{
// GT_CALLs are not decomposed, so will not be converted to GT_LONG
// GT_STORE_LCL_VAR = GT_CALL are handled in genMultiRegCallStoreToLocal
+ // GT_MULs are not decomposed, so will not be converted to GT_LONG
return tree->gtNext;
}
assert(use.Def()->OperGet() == GT_CALL);
// We only need to force var = call() if the call's result is used.
- if (use.IsDummyUse())
- return use.Def()->gtNext;
-
- GenTree* user = use.User();
- if (user->OperGet() == GT_STORE_LCL_VAR)
- {
- // If parent is already a STORE_LCL_VAR, we can skip it if
- // it is already marked as lvIsMultiRegRet.
- unsigned varNum = user->AsLclVarCommon()->gtLclNum;
- if (m_compiler->lvaTable[varNum].lvIsMultiRegRet)
- {
- return use.Def()->gtNext;
- }
- else if (!m_compiler->lvaTable[varNum].lvPromoted)
- {
- // If var wasn't promoted, we can just set lvIsMultiRegRet.
- m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
- return use.Def()->gtNext;
- }
- }
-
- GenTree* originalNode = use.Def();
-
- // Otherwise, we need to force var = call()
- unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
- m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
-
- // Decompose the new LclVar use
- return DecomposeLclVar(use);
+ return StoreNodeToVar(use);
}
//------------------------------------------------------------------------
}
//------------------------------------------------------------------------
+// DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are
+// those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that
+// returns its result in two registers like GT_CALLs do. Additionally, these muls are
+// guaranteed to be in the form long = (long)int * (long)int. Therefore, to decompose
+// these nodes, we convert them into GT_MUL_LONGs, undo the cast from int to long by
+// stripping out the lo ops, and force them into the form var = mul, as we do for
+// GT_CALLs. In codegen, we then produce a mul instruction that produces the result
+// in edx:eax, and store those registers on the stack in genStoreLongLclVar.
+//
+// All other GT_MULs have been converted to helper calls in morph.cpp
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeMul(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert(oper == GT_MUL);
+ assert((tree->gtFlags & GTF_MUL_64RSLT) != 0);
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ GenTree* loOp1 = op1->gtGetOp1();
+ GenTree* hiOp1 = op1->gtGetOp2();
+ GenTree* loOp2 = op2->gtGetOp1();
+ GenTree* hiOp2 = op2->gtGetOp2();
+
+ Range().Remove(hiOp1);
+ Range().Remove(hiOp2);
+ Range().Remove(op1);
+ Range().Remove(op2);
+
+ // Get rid of the hi ops. We don't need them.
+ tree->gtOp.gtOp1 = loOp1;
+ tree->gtOp.gtOp2 = loOp2;
+ tree->gtOper = GT_MUL_LONG;
+
+ return StoreNodeToVar(use);
+}
+
+//------------------------------------------------------------------------
+// StoreNodeToVar: Check if the user is a STORE_LCL_VAR, and if it isn't,
+// store the node to a var. Then decompose the new LclVar.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::StoreNodeToVar(LIR::Use& use)
+{
+ if (use.IsDummyUse())
+ return use.Def()->gtNext;
+
+ GenTree* tree = use.Def();
+ GenTree* user = use.User();
+
+ if (user->OperGet() == GT_STORE_LCL_VAR)
+ {
+ // If parent is already a STORE_LCL_VAR, we can skip it if
+ // it is already marked as lvIsMultiRegRet.
+ unsigned varNum = user->AsLclVarCommon()->gtLclNum;
+ if (m_compiler->lvaTable[varNum].lvIsMultiRegRet)
+ {
+ return tree->gtNext;
+ }
+ else if (!m_compiler->lvaTable[varNum].lvPromoted)
+ {
+ // If var wasn't promoted, we can just set lvIsMultiRegRet.
+ m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+ return tree->gtNext;
+ }
+ }
+
+ // Otherwise, we need to force var = call()
+ unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+
+ // Decompose the new LclVar use
+ return DecomposeLclVar(use);
+}
+//------------------------------------------------------------------------
// GetHiOper: Convert arithmetic operator to "high half" operator of decomposed node.
//
// Arguments:
case GT_SUB:
return GT_SUB_HI;
break;
- case GT_MUL:
- return GT_MUL_HI;
- break;
case GT_DIV:
return GT_DIV_HI;
break;
GenTree* DecomposeNeg(LIR::Use& use);
GenTree* DecomposeArith(LIR::Use& use);
GenTree* DecomposeShift(LIR::Use& use);
+ GenTree* DecomposeMul(LIR::Use& use);
// Helper functions
GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult);
+ GenTree* StoreNodeToVar(LIR::Use& use);
static genTreeOps GetHiOper(genTreeOps oper);
static genTreeOps GetLoOper(genTreeOps oper);
goto DASH;
case GT_MUL:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
if (tree->gtFlags & GTF_MUL_64RSLT)
{
printf("L");
{
case GT_ADD_HI:
case GT_SUB_HI:
- case GT_MUL_HI:
case GT_DIV_HI:
case GT_MOD_HI:
return true;
GTNODE(ROL , "rol" ,0,GTK_BINOP)
GTNODE(ROR , "ror" ,0,GTK_BINOP)
GTNODE(MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
+ // GT_MULHI is used in division by a constant (fgMorphDivByConst). We turn
+ // the div into a MULHI + some adjustments. In codegen, we only use the
+ // results of the high register, and we drop the low results.
GTNODE(ASG , "=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
GTNODE(ASG_ADD , "+=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
// nodes such as calls, returns and stores of long lclVars.
GTNODE(LONG , "gt_long" ,0,GTK_BINOP)
-// The following are nodes representing the upper half of a 64-bit operation
-// that requires a carry/borrow. However, they are all named GT_XXX_HI for
-// consistency.
+// The following are nodes representing x86 specific long operators, including
+// high operators of a 64-bit operations that requires a carry/borrow, which are
+// named GT_XXX_HI for consistency, low operators of 64-bit operations that need
+// to not be modified in phases post-decompose, and operators that return 64-bit
+// results in one instruction.
GTNODE(ADD_LO , "+Lo" ,1,GTK_BINOP)
GTNODE(ADD_HI , "+Hi" ,1,GTK_BINOP)
GTNODE(SUB_LO , "-Lo" ,0,GTK_BINOP)
GTNODE(SUB_HI , "-Hi" ,0,GTK_BINOP)
-GTNODE(MUL_HI , "*Hi" ,1,GTK_BINOP)
GTNODE(DIV_HI , "/Hi" ,0,GTK_BINOP)
GTNODE(MOD_HI , "%Hi" ,0,GTK_BINOP)
+GTNODE(MUL_LONG , "*long" ,1,GTK_BINOP) // A mul that returns the 2N bit result of an NxN multiply. This op
+ // is used for x86 multiplies that take two ints and return a long
+ // result. All other multiplies with long results are morphed into
+ // helper calls. It is similar to GT_MULHI, the difference being that
+ // GT_MULHI drops the lo part of the result, whereas GT_MUL_LONG keeps
+ // both parts of the result.
#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
#ifdef FEATURE_SIMD
case GT_MUL:
case GT_MULHI:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
SetMulOpCounts(tree);
break;
*/
void Lowering::SetMulOpCounts(GenTreePtr tree)
{
+#if defined(_TARGET_X86_)
+ assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI || tree->OperGet() == GT_MUL_LONG);
+#else
assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI);
-
+#endif
TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = 2;
// three-op form: reg = r/m * imm
// This special widening 32x32->64 MUL is not used on x64
- assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
+#if defined(_TARGET_X86_)
+ if(tree->OperGet() != GT_MUL_LONG)
+#endif
+ {
+ assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
+ }
// Multiply should never be using small types
assert(!varTypeIsSmall(tree->TypeGet()));
info->setDstCandidates(m_lsra, RBM_RAX);
hasImpliedFirstOperand = true;
}
- else if (tree->gtOper == GT_MULHI)
+ else if (tree->gtOper == GT_MULHI
+#if defined(_TARGET_X86_)
+ || tree->OperGet() == GT_MUL_LONG
+#endif
+ )
{
// have to use the encoding:RDX:RAX = RAX * rm
info->setDstCandidates(m_lsra, RBM_RAX);
break;
case GT_MULHI:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
killMask = RBM_RAX | RBM_RDX;
break;