Fix ARM64 unsigned div by const perf regression (#57400)
authorPent Ploompuu <kaalikas@gmail.com>
Tue, 17 Aug 2021 18:38:19 +0000 (21:38 +0300)
committerGitHub <noreply@github.com>
Tue, 17 Aug 2021 18:38:19 +0000 (11:38 -0700)
src/coreclr/jit/codegenarm64.cpp
src/coreclr/jit/lower.cpp

index 2428c33..33e7ec5 100644 (file)
@@ -1853,8 +1853,16 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
 
     // The arithmetic node must be sitting in a register (since it's not contained)
     assert(targetReg != REG_NA);
+    emitAttr attr = emitActualTypeSize(treeNode);
 
-    regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
+    // UMULL/SMULL is twice as fast for 32*32->64bit MUL
+    if ((oper == GT_MUL) && (targetType == TYP_LONG) && genActualTypeIsInt(op1) && genActualTypeIsInt(op2))
+    {
+        ins  = treeNode->IsUnsigned() ? INS_umull : INS_smull;
+        attr = EA_4BYTE;
+    }
+
+    regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2);
     assert(r == targetReg);
 
     genProduceReg(treeNode);
index a328206..08cbc8b 100644 (file)
@@ -5254,7 +5254,11 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
             BlockRange().InsertBefore(divMod, preShiftBy, adjustedDividend);
             firstNode = preShiftBy;
         }
-        else if (type != TYP_I_IMPL)
+        else if (type != TYP_I_IMPL
+#ifdef TARGET_ARM64
+                 && !simpleMul // On ARM64 we will use a 32x32->64 bit multiply as that's faster.
+#endif
+                 )
         {
             adjustedDividend = comp->gtNewCastNode(TYP_I_IMPL, adjustedDividend, true, TYP_U_IMPL);
             BlockRange().InsertBefore(divMod, adjustedDividend);
@@ -5269,6 +5273,14 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
 #endif
 
         divisor->gtType = TYP_I_IMPL;
+
+#ifdef TARGET_ARM64
+        if (simpleMul)
+        {
+            divisor->gtType = TYP_INT;
+        }
+#endif
+
         divisor->AsIntCon()->SetIconValue(magic);
 
         if (isDiv && !postShift && type == TYP_I_IMPL)