Morph `Vector.Create(0)` to `Vector.Zero` (#63821)
authorWill Smith <lol.tihan@gmail.com>
Thu, 17 Feb 2022 07:53:58 +0000 (23:53 -0800)
committerGitHub <noreply@github.com>
Thu, 17 Feb 2022 07:53:58 +0000 (23:53 -0800)
* Initial work

* Added a comma to display

* Cleanup

* Fixing build

* More cleanup

* Update comment

* Update comment

* Added CompareEqual Vector64/128 with Zero tests

* Do not contain op1 for now

* Wrong intrinsic id used

* Removing generated tests

* Removing generated tests

* Added CompareEqual tests

* Supporting containment for first operand

* Fix test build

* Passing correct register

* Check IsVectorZero before not allocing a register

* Update comment

* Fixing test

* Minor format change

* Fixed formatting

* Renamed test

* Adding AdvSimd_Arm64 tests:

* Adding support for rest of 'cmeq' and 'fcmeq' instructions

* Removing github csproj

* Minor test fix

* Fixed tests

* Fix print

* Minor format change

* Fixing test

* Initial commit for Vector.Create to Vector.Zero normalization

* Added some emitter tests

* Feedback

* Update emitarm64.cpp

* Feedback

* Handling variations of Vector.Create

* Use Operands iterator instead of edges

* Fix condition

* Simplify

* format

* Fixed IsFloatPositiveZero

* Uncomment

* Updated tests to include Vector64.Create/Vector128.Create for ARM64

* Making implementation of IsFloatPositiveZero explicit

* Update src/coreclr/jit/gentree.cpp

Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
* Feedback

* Update comment

* Update comment

* Do not perform optimization when VN CSE phase

* use ResetHWIntrinsicId

* Assert !optValnumCSE_phase

* Simplify IsVectorZero

* Simplify IsVectorZero

* Simplify some uses of Vector*_get_Zero

* Added another test

* Fixed formatting

* Revert lowering removal

* Update gentree.h

* Feedback

Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
src/coreclr/jit/compiler.h
src/coreclr/jit/gentree.h
src/coreclr/jit/morph.cpp
src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs

index 44c8834..fa5bd60 100644 (file)
@@ -6421,6 +6421,9 @@ private:
     GenTree* fgOptimizeCast(GenTreeCast* cast);
     GenTree* fgOptimizeEqualityComparisonWithConst(GenTreeOp* cmp);
     GenTree* fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp);
+#ifdef FEATURE_HW_INTRINSICS
+    GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
+#endif
     GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
     GenTree* fgOptimizeAddition(GenTreeOp* add);
     GenTree* fgOptimizeMultiply(GenTreeOp* mul);
index 5d37fb3..2647cd6 100644 (file)
@@ -7702,16 +7702,23 @@ inline bool GenTree::IsSIMDZero() const
 //
 inline bool GenTree::IsFloatPositiveZero() const
 {
-    return IsCnsFltOrDbl() && !IsCnsNonZeroFltOrDbl();
+    if (IsCnsFltOrDbl())
+    {
+        // This implementation is almost identical to IsCnsNonZeroFltOrDbl
+        // but it is easier to parse out
+        // rather than using !IsCnsNonZeroFltOrDbl.
+        double constValue = AsDblCon()->gtDconVal;
+        return *(__int64*)&constValue == 0;
+    }
+
+    return false;
 }
 
 //-------------------------------------------------------------------
-// IsVectorZero: returns true if this is an integral or floating-point (SIMD or HW intrinsic) vector
-// with all its elements equal to zero.
+// IsVectorZero: returns true if this node is a HWIntrinsic that is Vector*_get_Zero.
 //
 // Returns:
-//     True if this represents an integral or floating-point const (SIMD or HW intrinsic) vector with all its elements
-//     equal to zero.
+//     True if this represents a HWIntrinsic node that is Vector*_get_Zero.
 //
 // TODO: We already have IsSIMDZero() and IsIntegralConstVector(0),
 //       however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating
@@ -7720,46 +7727,17 @@ inline bool GenTree::IsFloatPositiveZero() const
 //       separate ones; preferably this one.
 inline bool GenTree::IsVectorZero() const
 {
-#ifdef FEATURE_SIMD
-    if (gtOper == GT_SIMD)
-    {
-        const GenTreeSIMD* node = AsSIMD();
-
-        if (node->GetSIMDIntrinsicId() == SIMDIntrinsicInit)
-        {
-            return (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero());
-        }
-    }
-#endif
-
 #ifdef FEATURE_HW_INTRINSICS
     if (gtOper == GT_HWINTRINSIC)
     {
-        const GenTreeHWIntrinsic* node         = AsHWIntrinsic();
-        const var_types           simdBaseType = node->GetSimdBaseType();
-
-        if (varTypeIsIntegral(simdBaseType) || varTypeIsFloating(simdBaseType))
-        {
-            const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
+        const GenTreeHWIntrinsic* node        = AsHWIntrinsic();
+        const NamedIntrinsic      intrinsicId = node->GetHWIntrinsicId();
 
-            if (node->GetOperandCount() == 0)
-            {
-#if defined(TARGET_XARCH)
-                return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
-#elif defined(TARGET_ARM64)
-                return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
-#endif // !TARGET_XARCH && !TARGET_ARM64
-            }
-            else if ((node->GetOperandCount() == 1) &&
-                     (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero()))
-            {
 #if defined(TARGET_XARCH)
-                return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create);
+        return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
 #elif defined(TARGET_ARM64)
-                return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create);
+        return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
 #endif // !TARGET_XARCH && !TARGET_ARM64
-            }
-        }
     }
 #endif // FEATURE_HW_INTRINSICS
 
index 414b82f..ed702b8 100644 (file)
@@ -13516,6 +13516,89 @@ GenTree* Compiler::fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp)
     return cmp;
 }
 
+#ifdef FEATURE_HW_INTRINSICS
+
+//------------------------------------------------------------------------
+// fgOptimizeHWIntrinsic: optimize a HW intrinsic node
+//
+// Arguments:
+//    node - HWIntrinsic node to examine
+//
+// Returns:
+//    The original node if no optimization happened or if tree bashing occured.
+//    An alternative tree if an optimization happened.
+//
+// Notes:
+//    Checks for HWIntrinsic nodes: Vector64.Create/Vector128.Create/Vector256.Create,
+//    and if the call is one of these, attempt to optimize.
+//    This is post-order, meaning that it will not morph the children.
+//
+GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+    assert(!optValnumCSE_phase);
+
+    if (opts.OptimizationDisabled())
+    {
+        return node;
+    }
+
+    switch (node->GetHWIntrinsicId())
+    {
+        case NI_Vector128_Create:
+#if defined(TARGET_XARCH)
+        case NI_Vector256_Create:
+#elif defined(TARGET_ARM64)
+        case NI_Vector64_Create:
+#endif
+        {
+            bool hwAllArgsAreConstZero = true;
+            for (GenTree* arg : node->Operands())
+            {
+                if (!arg->IsIntegralConst(0) && !arg->IsFloatPositiveZero())
+                {
+                    hwAllArgsAreConstZero = false;
+                    break;
+                }
+            }
+
+            if (hwAllArgsAreConstZero)
+            {
+                switch (node->GetHWIntrinsicId())
+                {
+                    case NI_Vector128_Create:
+                    {
+                        node->ResetHWIntrinsicId(NI_Vector128_get_Zero);
+                        break;
+                    }
+#if defined(TARGET_XARCH)
+                    case NI_Vector256_Create:
+                    {
+                        node->ResetHWIntrinsicId(NI_Vector256_get_Zero);
+                        break;
+                    }
+#elif defined(TARGET_ARM64)
+                    case NI_Vector64_Create:
+                    {
+                        node->ResetHWIntrinsicId(NI_Vector64_get_Zero);
+                        break;
+                    }
+#endif
+                    default:
+                        unreached();
+                }
+            }
+            break;
+        }
+
+        default:
+            break;
+    }
+
+    return node;
+}
+
+#endif
+
 //------------------------------------------------------------------------
 // fgOptimizeCommutativeArithmetic: Optimizes commutative operations.
 //
@@ -14447,6 +14530,13 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp)
     }
 #endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
 
+#ifdef FEATURE_HW_INTRINSICS
+    if (multiOp->OperIsHWIntrinsic() && !optValnumCSE_phase)
+    {
+        return fgOptimizeHWIntrinsic(multiOp->AsHWIntrinsic());
+    }
+#endif
+
     return multiOp;
 }
 #endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)
index 6e99557..9524e8d 100644 (file)
@@ -53,6 +53,30 @@ class Program
     }
 
     [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<int> AdvSimd_CompareEqual_Vector64_Int32_CreateZero(Vector64<int> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector64.Create(0));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<int> AdvSimd_CompareEqual_Vector64_Int32_CreateZeroZero(Vector64<int> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector64.Create(0, 0));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<float> AdvSimd_CompareEqual_Vector64_Single_CreateZero(Vector64<float> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector64.Create(0f));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<float> AdvSimd_CompareEqual_Vector64_Single_CreateZeroZero(Vector64<float> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector64.Create(0f, 0f));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
     static Vector128<byte> AdvSimd_CompareEqual_Vector128_Byte_Zero(Vector128<byte> left)
     {
         return AdvSimd.CompareEqual(left, Vector128<byte>.Zero);
@@ -95,6 +119,37 @@ class Program
     }
 
     [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<int> AdvSimd_CompareEqual_Vector128_Int32_CreateZero(Vector128<int> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector128.Create(0));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<int> AdvSimd_CompareEqual_Vector128_Int32_CreateZeroZeroZeroZero(Vector128<int> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector128.Create(0, 0, 0, 0));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> AdvSimd_CompareEqual_Vector128_Single_CreateZero(Vector128<float> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector128.Create(0f));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero(Vector128<float> left)
+    {
+        return AdvSimd.CompareEqual(left, Vector128.Create(0f, 0f, 0f, 0f));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero_AsVariable(Vector128<float> left)
+    {
+        var asVar = Vector128.Create(0f, 0f, 0f, 0f);
+        return AdvSimd.CompareEqual(left, asVar);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
     static Vector128<double> AdvSimd_Arm64_CompareEqual_Vector128_Double_Zero(Vector128<double> left)
     {
         return AdvSimd.Arm64.CompareEqual(left, Vector128<double>.Zero);
@@ -432,6 +487,8 @@ class Program
 
         // Begin CompareEqual Tests
 
+        // Vector64
+
         if (!ValidateResult_Vector64<byte>(AdvSimd_CompareEqual_Vector64_Byte_Zero(Vector64<byte>.Zero), Byte.MaxValue))
             result = -1;
 
@@ -453,6 +510,22 @@ class Program
         if (!ValidateResult_Vector64<float>(AdvSimd_CompareEqual_Vector64_Single_Zero(Vector64<float>.Zero), Single.NaN))
             result = -1;
 
+        // Vector64.Create
+
+        if (!ValidateResult_Vector64<int>(AdvSimd_CompareEqual_Vector64_Int32_CreateZero(Vector64<int>.Zero), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector64<float>(AdvSimd_CompareEqual_Vector64_Single_CreateZero(Vector64<float>.Zero), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector64<int>(AdvSimd_CompareEqual_Vector64_Int32_CreateZeroZero(Vector64<int>.Zero), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector64<float>(AdvSimd_CompareEqual_Vector64_Single_CreateZeroZero(Vector64<float>.Zero), Single.NaN))
+            result = -1;
+
+        // Vector128
+
         if (!ValidateResult_Vector128<byte>(AdvSimd_CompareEqual_Vector128_Byte_Zero(Vector128<byte>.Zero), Byte.MaxValue))
             result = -1;
 
@@ -474,6 +547,23 @@ class Program
         if (!ValidateResult_Vector128<float>(AdvSimd_CompareEqual_Vector128_Single_Zero(Vector128<float>.Zero), Single.NaN))
             result = -1;
 
+        // Vector128.Create
+
+        if (!ValidateResult_Vector128<int>(AdvSimd_CompareEqual_Vector128_Int32_CreateZero(Vector128<int>.Zero), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector128<float>(AdvSimd_CompareEqual_Vector128_Single_CreateZero(Vector128<float>.Zero), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<int>(AdvSimd_CompareEqual_Vector128_Int32_CreateZeroZeroZeroZero(Vector128<int>.Zero), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector128<float>(AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero(Vector128<float>.Zero), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<float>(AdvSimd_CompareEqual_Vector128_Single_CreateZeroZeroZeroZero_AsVariable(Vector128<float>.Zero), Single.NaN))
+            result = -1;
+
         // End CompareEqual Tests
 
         // Begin CompareGreaterThan Tests
@@ -589,6 +679,8 @@ class Program
 
         // Begin CompareEqual Tests
 
+        // Vector64
+
         if (!ValidateResult_Vector64<byte>(AdvSimd_CompareEqual_Vector64_Byte_Zero_Swapped(Vector64<byte>.Zero), Byte.MaxValue))
             result = -1;
 
@@ -610,6 +702,8 @@ class Program
         if (!ValidateResult_Vector64<float>(AdvSimd_CompareEqual_Vector64_Single_Zero_Swapped(Vector64<float>.Zero), Single.NaN))
             result = -1;
 
+        // Vector128
+
         if (!ValidateResult_Vector128<byte>(AdvSimd_CompareEqual_Vector128_Byte_Zero_Swapped(Vector128<byte>.Zero), Byte.MaxValue))
             result = -1;
 
@@ -642,6 +736,8 @@ class Program
 
         // Begin CompareEqual Tests
 
+        // Vector128
+
         if (!ValidateResult_Vector128<double>(AdvSimd_Arm64_CompareEqual_Vector128_Double_Zero(Vector128<double>.Zero), Double.NaN))
             result = -1;
 
@@ -651,6 +747,8 @@ class Program
         if (!ValidateResult_Vector128<long>(AdvSimd_Arm64_CompareEqual_Vector128_Int64_Zero(Vector128<long>.Zero), -1))
             result = -1;
 
+        // Vector64
+
         if (!ValidateResult_Vector64<float>(AdvSimd_Arm64_CompareEqualScalar_Vector64_Single_Zero(Vector64<float>.Zero), Vector64.CreateScalar(Single.NaN)))
             result = -1;
 
@@ -674,6 +772,8 @@ class Program
 
         // Begin CompareEqual Tests
 
+        // Vector128
+
         if (!ValidateResult_Vector128<double>(AdvSimd_Arm64_CompareEqual_Vector128_Double_Zero_Swapped(Vector128<double>.Zero), Double.NaN))
             result = -1;
 
@@ -683,6 +783,8 @@ class Program
         if (!ValidateResult_Vector128<long>(AdvSimd_Arm64_CompareEqual_Vector128_Int64_Zero_Swapped(Vector128<long>.Zero), -1))
             result = -1;
 
+        // Vector64
+
         if (!ValidateResult_Vector64<float>(AdvSimd_Arm64_CompareEqualScalar_Vector64_Single_Zero_Swapped(Vector64<float>.Zero), Vector64.CreateScalar(Single.NaN)))
             result = -1;