From: Pent Ploompuu Date: Tue, 17 Aug 2021 18:38:19 +0000 (+0300) Subject: Fix ARM64 unsigned div by const perf regression (#57400) X-Git-Tag: accepted/tizen/unified/20220110.054933~312 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=13d992760f221bda9066c674dc2e13ee29a84613;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Fix ARM64 unsigned div by const perf regression (#57400) --- diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2428c33..33e7ec5 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1853,8 +1853,16 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) // The arithmetic node must be sitting in a register (since it's not contained) assert(targetReg != REG_NA); + emitAttr attr = emitActualTypeSize(treeNode); - regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); + // UMULL/SMULL is twice as fast for 32*32->64bit MUL + if ((oper == GT_MUL) && (targetType == TYP_LONG) && genActualTypeIsInt(op1) && genActualTypeIsInt(op2)) + { + ins = treeNode->IsUnsigned() ? INS_umull : INS_smull; + attr = EA_4BYTE; + } + + regNumber r = emit->emitInsTernary(ins, attr, treeNode, op1, op2); assert(r == targetReg); genProduceReg(treeNode); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a328206..08cbc8b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5254,7 +5254,11 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) BlockRange().InsertBefore(divMod, preShiftBy, adjustedDividend); firstNode = preShiftBy; } - else if (type != TYP_I_IMPL) + else if (type != TYP_I_IMPL +#ifdef TARGET_ARM64 + && !simpleMul // On ARM64 we will use a 32x32->64 bit multiply as that's faster. +#endif + ) { adjustedDividend = comp->gtNewCastNode(TYP_I_IMPL, adjustedDividend, true, TYP_U_IMPL); BlockRange().InsertBefore(divMod, adjustedDividend); @@ -5269,6 +5273,14 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) #endif divisor->gtType = TYP_I_IMPL; + +#ifdef TARGET_ARM64 + if (simpleMul) + { + divisor->gtType = TYP_INT; + } +#endif + divisor->AsIntCon()->SetIconValue(magic); if (isDiv && !postShift && type == TYP_I_IMPL)