From 7345109fc3f4726611cd02ec3e522f4bc42f3a16 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 20 Feb 2022 02:51:48 +0300 Subject: [PATCH] Faster Math.Max/Min on arm64 (#65584) Co-authored-by: Adeel Mujahid <3840695+am11@users.noreply.github.com> Co-authored-by: Tanner Gooding --- src/coreclr/jit/codegenarmarch.cpp | 14 +- src/coreclr/jit/fgdiagnostic.cpp | 2 +- src/coreclr/jit/gentree.cpp | 43 ++++++ src/coreclr/jit/gentree.h | 17 +-- src/coreclr/jit/importer.cpp | 19 +++ src/coreclr/jit/lsraarm64.cpp | 47 ++++-- src/coreclr/jit/namedintrinsiclist.h | 2 + src/coreclr/jit/utils.cpp | 162 +++++++++++++++++++++ src/coreclr/jit/utils.h | 16 ++ src/coreclr/jit/valuenum.cpp | 40 +++++ src/coreclr/jit/valuenumfuncs.h | 2 + .../System.Private.CoreLib/src/System/Math.cs | 4 + 12 files changed, 334 insertions(+), 34 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index b6d744a..6c0ec3c 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -669,7 +669,7 @@ void CodeGen::genIntrinsic(GenTree* treeNode) assert(varTypeIsFloating(srcNode)); assert(srcNode->TypeGet() == treeNode->TypeGet()); - // Right now only Abs/Ceiling/Floor/Truncate/Round/Sqrt are treated as math intrinsics. + // Only a subset of functions are treated as math intrinsics. // switch (treeNode->AsIntrinsic()->gtIntrinsicName) { @@ -698,6 +698,18 @@ void CodeGen::genIntrinsic(GenTree* treeNode) genConsumeOperands(treeNode->AsOp()); GetEmitter()->emitInsBinary(INS_frintn, emitActualTypeSize(treeNode), treeNode, srcNode); break; + + case NI_System_Math_Max: + genConsumeOperands(treeNode->AsOp()); + GetEmitter()->emitIns_R_R_R(INS_fmax, emitActualTypeSize(treeNode), treeNode->GetRegNum(), + treeNode->gtGetOp1()->GetRegNum(), treeNode->gtGetOp2()->GetRegNum()); + break; + + case NI_System_Math_Min: + genConsumeOperands(treeNode->AsOp()); + GetEmitter()->emitIns_R_R_R(INS_fmin, emitActualTypeSize(treeNode), treeNode->GetRegNum(), + treeNode->gtGetOp1()->GetRegNum(), treeNode->gtGetOp2()->GetRegNum()); + break; #endif // TARGET_ARM64 case NI_System_Math_Sqrt: diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 2df6d00..13fd138 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -2954,7 +2954,7 @@ void Compiler::fgDebugCheckFlags(GenTree* tree) // We reuse GTF_REVERSE_OPS as GTF_VAR_ARR_INDEX for LCL_VAR nodes. if (((tree->gtFlags & GTF_REVERSE_OPS) != 0) && !tree->OperIs(GT_LCL_VAR)) { - assert(tree->OperSupportsReverseOps()); + assert(tree->OperSupportsReverseOpEvalOrder(this)); } GenTree* op1 = tree->OperIsSimple() ? tree->gtGetOp1() : nullptr; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a47a56a..afa6c78 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3855,6 +3855,8 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case NI_System_Math_Log: case NI_System_Math_Log2: case NI_System_Math_Log10: + case NI_System_Math_Max: + case NI_System_Math_Min: case NI_System_Math_Pow: case NI_System_Math_Round: case NI_System_Math_Sin: @@ -4372,6 +4374,12 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // register requirement. level += 2; break; + + case NI_System_Math_Max: + case NI_System_Math_Min: + level++; + break; + default: assert(!"Unknown binary GT_INTRINSIC operator"); break; @@ -4805,6 +4813,35 @@ DONE: #pragma warning(pop) #endif +#ifdef DEBUG +bool GenTree::OperSupportsReverseOpEvalOrder(Compiler* comp) const +{ + if (OperIsBinary()) + { + if ((AsOp()->gtGetOp1() == nullptr) || (AsOp()->gtGetOp2() == nullptr)) + { + return false; + } + if (OperIs(GT_COMMA, GT_BOUNDS_CHECK)) + { + return false; + } + if (OperIs(GT_INTRINSIC)) + { + return !comp->IsIntrinsicImplementedByUserCall(AsIntrinsic()->gtIntrinsicName); + } + return true; + } +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + if (OperIsMultiOp()) + { + return AsMultiOp()->GetOperandCount() == 2; + } +#endif // FEATURE_SIMD || FEATURE_HW_INTRINSICS + return false; +} +#endif // DEBUG + /***************************************************************************** * * If the given tree is an integer constant that can be used @@ -10893,6 +10930,12 @@ void Compiler::gtDispTree(GenTree* tree, case NI_System_Math_Log10: printf(" log10"); break; + case NI_System_Math_Max: + printf(" max"); + break; + case NI_System_Math_Min: + printf(" min"); + break; case NI_System_Math_Pow: printf(" pow"); break; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index fb340cd..9ec1958 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1704,7 +1704,7 @@ public: return (DebugOperKind() & DBK_NOTLIR) == 0; } - bool OperSupportsReverseOps() const; + bool OperSupportsReverseOpEvalOrder(Compiler* comp) const; static bool RequiresNonNullOp2(genTreeOps oper); bool IsValidCallArgument(); #endif // DEBUG @@ -7870,22 +7870,7 @@ inline GenTree* GenTree::gtGetOp1() const } } -inline bool GenTree::OperSupportsReverseOps() const -{ - if (OperIsBinary() && !OperIs(GT_COMMA, GT_INTRINSIC, GT_BOUNDS_CHECK)) - { - return (AsOp()->gtGetOp1() != nullptr) && (AsOp()->gtGetOp2() != nullptr); - } -#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) - if (OperIsMultiOp()) - { - return AsMultiOp()->GetOperandCount() == 2; - } -#endif // FEATURE_SIMD || FEATURE_HW_INTRINSICS - return false; -} #endif // DEBUG - inline GenTree* GenTree::gtGetOp2() const { assert(OperIsBinary()); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 01942a2..3e84db2 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4492,6 +4492,13 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_Math_Log: case NI_System_Math_Log2: case NI_System_Math_Log10: +#ifdef TARGET_ARM64 + // ARM64 has fmax/fmin which are IEEE754:2019 minimum/maximum compatible + // TODO-XARCH-CQ: Enable this for XARCH when one of the arguments is a constant + // so we can then emit maxss/minss and avoid NaN/-0.0 handling + case NI_System_Math_Max: + case NI_System_Math_Min: +#endif case NI_System_Math_Pow: case NI_System_Math_Round: case NI_System_Math_Sin: @@ -5118,6 +5125,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_Math_Log10; } + else if (strcmp(methodName, "Max") == 0) + { + result = NI_System_Math_Max; + } + else if (strcmp(methodName, "Min") == 0) + { + result = NI_System_Math_Min; + } else if (strcmp(methodName, "Pow") == 0) { result = NI_System_Math_Pow; @@ -20549,6 +20564,8 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Truncate: case NI_System_Math_Round: case NI_System_Math_Sqrt: + case NI_System_Math_Max: + case NI_System_Math_Min: return true; case NI_System_Math_FusedMultiplyAdd: @@ -20613,6 +20630,8 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_Log: case NI_System_Math_Log2: case NI_System_Math_Log10: + case NI_System_Math_Max: + case NI_System_Math_Min: case NI_System_Math_Pow: case NI_System_Math_Round: case NI_System_Math_Sin: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9a23248..091068a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -326,22 +326,37 @@ int LinearScan::BuildNode(GenTree* tree) case GT_INTRINSIC: { - noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Truncate) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) || - (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt)); - - // Both operand and its result must be of the same floating point type. - GenTree* op1 = tree->gtGetOp1(); - assert(varTypeIsFloating(op1)); - assert(op1->TypeGet() == tree->TypeGet()); - - BuildUse(op1); - srcCount = 1; - assert(dstCount == 1); - BuildDef(tree); + switch (tree->AsIntrinsic()->gtIntrinsicName) + { + case NI_System_Math_Max: + case NI_System_Math_Min: + assert(varTypeIsFloating(tree->gtGetOp1())); + assert(varTypeIsFloating(tree->gtGetOp2())); + assert(tree->gtGetOp1()->TypeIs(tree->TypeGet())); + + srcCount = BuildBinaryUses(tree->AsOp()); + assert(dstCount == 1); + BuildDef(tree); + break; + + case NI_System_Math_Abs: + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: + case NI_System_Math_Truncate: + case NI_System_Math_Round: + case NI_System_Math_Sqrt: + assert(varTypeIsFloating(tree->gtGetOp1())); + assert(tree->gtGetOp1()->TypeIs(tree->TypeGet())); + + BuildUse(tree->gtGetOp1()); + srcCount = 1; + assert(dstCount == 1); + BuildDef(tree); + break; + + default: + unreached(); + } } break; diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index d864c3f..c830a26 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -36,6 +36,8 @@ enum NamedIntrinsic : unsigned short NI_System_Math_Log, NI_System_Math_Log2, NI_System_Math_Log10, + NI_System_Math_Max, + NI_System_Math_Min, NI_System_Math_Pow, NI_System_Math_Round, NI_System_Math_Sin, diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 1629b10..529c653 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -2211,6 +2211,168 @@ bool FloatingPointUtils::hasPreciseReciprocal(float x) return mantissa == 0 && exponent != 0 && exponent != 127; } +//------------------------------------------------------------------------ +// isNegative: Determines whether the specified value is negative +// +// Arguments: +// val - value to check +// +// Return Value: +// True if val is negative +// + +bool FloatingPointUtils::isNegative(float val) +{ + return *reinterpret_cast(&val) < 0; +} + +//------------------------------------------------------------------------ +// isNegative: Determines whether the specified value is negative +// +// Arguments: +// val - value to check +// +// Return Value: +// True if val is negative +// + +bool FloatingPointUtils::isNegative(double val) +{ + return *reinterpret_cast(&val) < 0; +} + +//------------------------------------------------------------------------ +// isNaN: Determines whether the specified value is NaN +// +// Arguments: +// val - value to check for NaN +// +// Return Value: +// True if val is NaN +// + +bool FloatingPointUtils::isNaN(float val) +{ + UINT32 bits = *reinterpret_cast(&val); + return (bits & 0x7FFFFFFFU) > 0x7F800000U; +} + +//------------------------------------------------------------------------ +// isNaN: Determines whether the specified value is NaN +// +// Arguments: +// val - value to check for NaN +// +// Return Value: +// True if val is NaN +// + +bool FloatingPointUtils::isNaN(double val) +{ + UINT64 bits = *reinterpret_cast(&val); + return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL; +} + +//------------------------------------------------------------------------ +// maximum: This matches the IEEE 754:2019 `maximum` function +// It propagates NaN inputs back to the caller and +// otherwise returns the larger of the inputs. It +// treats +0 as larger than -0 as per the specification. +// +// Arguments: +// val1 - left operand +// val2 - right operand +// +// Return Value: +// Either val1 or val2 +// + +double FloatingPointUtils::maximum(double val1, double val2) +{ + if (val1 != val2) + { + if (!isNaN(val1)) + { + return val2 < val1 ? val1 : val2; + } + return val1; + } + return isNegative(val2) ? val1 : val2; +} + +//------------------------------------------------------------------------ +// maximum: This matches the IEEE 754:2019 `maximum` function +// It propagates NaN inputs back to the caller and +// otherwise returns the larger of the inputs. It +// treats +0 as larger than -0 as per the specification. +// +// Arguments: +// val1 - left operand +// val2 - right operand +// +// Return Value: +// Either val1 or val2 +// + +float FloatingPointUtils::maximum(float val1, float val2) +{ + if (val1 != val2) + { + if (!isNaN(val1)) + { + return val2 < val1 ? val1 : val2; + } + return val1; + } + return isNegative(val2) ? val1 : val2; +} + +//------------------------------------------------------------------------ +// minimum: This matches the IEEE 754:2019 `minimum` function +// It propagates NaN inputs back to the caller and +// otherwise returns the larger of the inputs. It +// treats +0 as larger than -0 as per the specification. +// +// Arguments: +// val1 - left operand +// val2 - right operand +// +// Return Value: +// Either val1 or val2 +// + +double FloatingPointUtils::minimum(double val1, double val2) +{ + if (val1 != val2 && !isNaN(val1)) + { + return val1 < val2 ? val1 : val2; + } + return isNegative(val1) ? val1 : val2; +} + +//------------------------------------------------------------------------ +// minimum: This matches the IEEE 754:2019 `minimum` function +// It propagates NaN inputs back to the caller and +// otherwise returns the larger of the inputs. It +// treats +0 as larger than -0 as per the specification. +// +// Arguments: +// val1 - left operand +// val2 - right operand +// +// Return Value: +// Either val1 or val2 +// + +float FloatingPointUtils::minimum(float val1, float val2) +{ + if (val1 != val2 && !isNaN(val1)) + { + return val1 < val2 ? val1 : val2; + } + return isNegative(val1) ? val1 : val2; +} + namespace MagicDivide { template diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index f0d29d0..39985f1 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -690,6 +690,22 @@ public: static double infinite_double(); static float infinite_float(); + + static bool isNegative(float val); + + static bool isNegative(double val); + + static bool isNaN(float val); + + static bool isNaN(double val); + + static double maximum(double val1, double val2); + + static float maximum(float val1, float val2); + + static double minimum(double val1, double val2); + + static float minimum(float val1, float val2); }; // The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 7ea3770..27e37dc 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -5726,6 +5726,22 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF break; } + case NI_System_Math_Max: + { + assert(typ == TypeOfVN(arg1VN)); + double arg1Val = GetConstantDouble(arg1VN); + res = FloatingPointUtils::maximum(arg0Val, arg1Val); + break; + } + + case NI_System_Math_Min: + { + assert(typ == TypeOfVN(arg1VN)); + double arg1Val = GetConstantDouble(arg1VN); + res = FloatingPointUtils::minimum(arg0Val, arg1Val); + break; + } + default: // the above are the only binary math intrinsics at the time of this writing. unreached(); @@ -5759,6 +5775,22 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF break; } + case NI_System_Math_Max: + { + assert(typ == TypeOfVN(arg1VN)); + float arg1Val = GetConstantSingle(arg1VN); + res = FloatingPointUtils::maximum(arg0Val, arg1Val); + break; + } + + case NI_System_Math_Min: + { + assert(typ == TypeOfVN(arg1VN)); + float arg1Val = GetConstantSingle(arg1VN); + res = FloatingPointUtils::minimum(arg0Val, arg1Val); + break; + } + case NI_System_Math_Pow: { assert(typ == TypeOfVN(arg1VN)); @@ -5789,6 +5821,14 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathF vnf = VNF_FMod; break; + case NI_System_Math_Max: + vnf = VNF_Max; + break; + + case NI_System_Math_Min: + vnf = VNF_Min; + break; + case NI_System_Math_Pow: vnf = VNF_Pow; break; diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index 47135a0..74e8b90 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -92,6 +92,8 @@ ValueNumFuncDef(ILogB, 1, false, false, false) ValueNumFuncDef(Log, 1, false, false, false) ValueNumFuncDef(Log2, 1, false, false, false) ValueNumFuncDef(Log10, 1, false, false, false) +ValueNumFuncDef(Max, 2, false, false, false) +ValueNumFuncDef(Min, 2, false, false, false) ValueNumFuncDef(Pow, 2, false, false, false) ValueNumFuncDef(RoundDouble, 1, false, false, false) ValueNumFuncDef(RoundInt32, 1, false, false, false) diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index cdf6f3c..52f95fa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -881,6 +881,7 @@ namespace System return decimal.Max(val1, val2); } + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double Max(double val1, double val2) { @@ -938,6 +939,7 @@ namespace System return (val1 >= val2) ? val1 : val2; } + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float Max(float val1, float val2) { @@ -1028,6 +1030,7 @@ namespace System return decimal.Min(val1, val2); } + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double Min(double val1, double val2) { @@ -1080,6 +1083,7 @@ namespace System return (val1 <= val2) ? val1 : val2; } + [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float Min(float val1, float val2) { -- 2.7.4