// The arithmetic node must be sitting in a register (since it's not contained)
assert(targetReg != REG_NA);
+ emitAttr attr = emitActualTypeSize(treeNode);
- regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
+ // UMULL/SMULL is twice as fast for 32*32->64bit MUL
+ if ((oper == GT_MUL) && (targetType == TYP_LONG) && genActualTypeIsInt(op1) && genActualTypeIsInt(op2))
+ {
+ ins = treeNode->IsUnsigned() ? INS_umull : INS_smull;
+ attr = EA_4BYTE;
+ }
+
+ regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2);
assert(r == targetReg);
genProduceReg(treeNode);
BlockRange().InsertBefore(divMod, preShiftBy, adjustedDividend);
firstNode = preShiftBy;
}
- else if (type != TYP_I_IMPL)
+ else if (type != TYP_I_IMPL
+#ifdef TARGET_ARM64
+ && !simpleMul // On ARM64 we will use a 32x32->64 bit multiply as that's faster.
+#endif
+ )
{
adjustedDividend = comp->gtNewCastNode(TYP_I_IMPL, adjustedDividend, true, TYP_U_IMPL);
BlockRange().InsertBefore(divMod, adjustedDividend);
#endif
divisor->gtType = TYP_I_IMPL;
+
+#ifdef TARGET_ARM64
+ if (simpleMul)
+ {
+ divisor->gtType = TYP_INT;
+ }
+#endif
+
divisor->AsIntCon()->SetIconValue(magic);
if (isDiv && !postShift && type == TYP_I_IMPL)