From d95bfea59ed7d19e9b1db096c9d332005989296b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 14 Jun 2021 12:53:57 -0700 Subject: [PATCH] Ensure Vector.Sum uses SSE3, rather than SSSE3, for floating-point (#54123) * Adding a JIT/SIMD test validating Vector.Sum * Ensure Vector.Sum uses SSE3, rather than SSSE3, for floating-point * Ensure we do ISA checks before popping values from the stack * Applying formatting patch --- src/coreclr/jit/simdashwintrinsic.cpp | 48 ++++++++++++++------- src/tests/JIT/SIMD/VectorSum.cs | 77 ++++++++++++++++++++++++++++++++++ src/tests/JIT/SIMD/VectorSum_r.csproj | 13 ++++++ src/tests/JIT/SIMD/VectorSum_ro.csproj | 13 ++++++ 4 files changed, 136 insertions(+), 15 deletions(-) create mode 100644 src/tests/JIT/SIMD/VectorSum.cs create mode 100644 src/tests/JIT/SIMD/VectorSum_r.csproj create mode 100644 src/tests/JIT/SIMD/VectorSum_ro.csproj diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 8e8b760..638353f 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -479,6 +479,27 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } break; } + + case NI_VectorT128_Sum: + { + // TODO-XArch-CQ: We could support this all the way down to SSE2 and that might be + // worthwhile so we can accelerate cases like byte/sbyte and long/ulong + + if (varTypeIsFloating(simdBaseType)) + { + if (!compOpportunisticallyDependsOn(InstructionSet_SSE3)) + { + // Floating-point types require SSE3.HorizontalAdd + return nullptr; + } + } + else if (!compOpportunisticallyDependsOn(InstructionSet_SSSE3)) + { + // Integral types require SSSE3.HorizontalAdd + return nullptr; + } + break; + } #endif // TARGET_XARCH default: @@ -721,25 +742,22 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } case NI_VectorT128_Sum: { - if (compOpportunisticallyDependsOn(InstructionSet_SSSE3)) - { - GenTree* tmp; - unsigned vectorLength = getSIMDVectorLength(simdSize, simdBaseType); - int haddCount = genLog2(vectorLength); - for (int i = 0; i < haddCount; i++) - { - op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Vector.Sum")); - op1 = gtNewSimdAsHWIntrinsicNode(simdType, op1, tmp, NI_SSSE3_HorizontalAdd, - simdBaseJitType, simdSize); - } + GenTree* tmp; + unsigned vectorLength = getSIMDVectorLength(simdSize, simdBaseType); + int haddCount = genLog2(vectorLength); - return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_Vector128_ToScalar, simdBaseJitType, - simdSize); + NamedIntrinsic horizontalAdd = + varTypeIsFloating(simdBaseType) ? NI_SSE3_HorizontalAdd : NI_SSSE3_HorizontalAdd; + + for (int i = 0; i < haddCount; i++) + { + op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.Sum")); + op1 = gtNewSimdAsHWIntrinsicNode(simdType, op1, tmp, horizontalAdd, simdBaseJitType, simdSize); } - return nullptr; + return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_Vector128_ToScalar, simdBaseJitType, simdSize); } case NI_VectorT256_Sum: { diff --git a/src/tests/JIT/SIMD/VectorSum.cs b/src/tests/JIT/SIMD/VectorSum.cs new file mode 100644 index 0000000..0a6bf67 --- /dev/null +++ b/src/tests/JIT/SIMD/VectorSum.cs @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +using System; +using System.Numerics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +internal partial class VectorTest +{ + private const int Pass = 100; + private const int Fail = -1; + + private class VectorSumTest where T : struct, IComparable, IEquatable + { + public static int VectorSum(T a, T b) + { + Vector A = new Vector(a); + T B = Vector.Sum(A); + + if (!(CheckValue(B, b))) + { + return Fail; + } + return Pass; + } + } + + private static int Main() + { + int returnVal = Pass; + + if (VectorSumTest.VectorSum(1, (float)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(1, (double)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(1, (int)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(1, (long)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(1, (ushort)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(1, (byte)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(-1, (short)(-Vector.Count)) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(-1, (sbyte)(-Vector.Count)) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(0x41000000u, 0x41000000u * (uint)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(0x4100000000000000ul, 0x4100000000000000ul * (uint)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(1, (nint)Vector.Count) != Pass) returnVal = Fail; + if (VectorSumTest.VectorSum(0x41000000u, 0x41000000u * (nuint)(uint)Vector.Count) != Pass) returnVal = Fail; + + JitLog jitLog = new JitLog(); + + if (Sse3.IsSupported || AdvSimd.IsSupported) + { + if (!jitLog.Check("Sum", "Single")) returnVal = Fail; + if (!jitLog.Check("Sum", "Double")) returnVal = Fail; + } + + if (Ssse3.IsSupported || AdvSimd.IsSupported) + { + if (!jitLog.Check("Sum", "Int16")) returnVal = Fail; + if (!jitLog.Check("Sum", "Int32")) returnVal = Fail; + if (!jitLog.Check("Sum", "UInt16")) returnVal = Fail; + if (!jitLog.Check("Sum", "UInt32")) returnVal = Fail; + } + + if (AdvSimd.IsSupported) + { + if (!jitLog.Check("Sum", "Byte")) returnVal = Fail; + if (!jitLog.Check("Sum", "Int64")) returnVal = Fail; + if (!jitLog.Check("Sum", "IntPtr")) returnVal = Fail; + if (!jitLog.Check("Sum", "SByte")) returnVal = Fail; + if (!jitLog.Check("Sum", "UInt64")) returnVal = Fail; + if (!jitLog.Check("Sum", "UIntPtr")) returnVal = Fail; + } + + jitLog.Dispose(); + + return returnVal; + } +} diff --git a/src/tests/JIT/SIMD/VectorSum_r.csproj b/src/tests/JIT/SIMD/VectorSum_r.csproj new file mode 100644 index 0000000..56a3fad --- /dev/null +++ b/src/tests/JIT/SIMD/VectorSum_r.csproj @@ -0,0 +1,13 @@ + + + Exe + + + None + + + + + + + diff --git a/src/tests/JIT/SIMD/VectorSum_ro.csproj b/src/tests/JIT/SIMD/VectorSum_ro.csproj new file mode 100644 index 0000000..3a4794c --- /dev/null +++ b/src/tests/JIT/SIMD/VectorSum_ro.csproj @@ -0,0 +1,13 @@ + + + Exe + + + None + True + + + + + + -- 2.7.4