Marking Vector128<T>.Count and Vector256<T>.Count as [Intrinsic] (#24991)
authorTanner Gooding <tagoo@outlook.com>
Thu, 6 Jun 2019 23:06:22 +0000 (16:06 -0700)
committerGitHub <noreply@github.com>
Thu, 6 Jun 2019 23:06:22 +0000 (16:06 -0700)
* Marking Vector128<T>.Count and Vector256<T>.Count as [Intrinsic]

* Fixing NI_Vector128_Count and NI_Vector256_Count to use clsHnd when getting the simdSize and baseType

* Applying the formatting patch.

* Changing some comments to just be "vector element count".

* Fixing impBaseIntrinsic to set the baseType so Vector128_Count and Vector256_Count don't return nullptr

src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs
src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs
src/jit/compiler.h
src/jit/gentree.cpp
src/jit/hwintrinsicArm64.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/importer.cpp
src/jit/optimizer.cpp

index cdcddcb..9f609ea 100644 (file)
@@ -39,6 +39,7 @@ namespace System.Runtime.Intrinsics
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         public static int Count
         {
+            [Intrinsic]
             get
             {
                 ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
index 2ed5516..0a9b993 100644 (file)
@@ -41,6 +41,7 @@ namespace System.Runtime.Intrinsics
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         public static int Count
         {
+            [Intrinsic]
             get
             {
                 ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
index c73ca30..da42aa1 100644 (file)
@@ -3527,6 +3527,7 @@ protected:
 
 #ifdef FEATURE_HW_INTRINSICS
     GenTree* impHWIntrinsic(NamedIntrinsic        intrinsic,
+                            CORINFO_CLASS_HANDLE  clsHnd,
                             CORINFO_METHOD_HANDLE method,
                             CORINFO_SIG_INFO*     sig,
                             bool                  mustExpand);
@@ -3540,6 +3541,7 @@ protected:
 
 #ifdef _TARGET_XARCH_
     GenTree* impBaseIntrinsic(NamedIntrinsic        intrinsic,
+                              CORINFO_CLASS_HANDLE  clsHnd,
                               CORINFO_METHOD_HANDLE method,
                               CORINFO_SIG_INFO*     sig,
                               bool                  mustExpand);
@@ -5770,7 +5772,7 @@ public:
 #define LPFLG_VAR_LIMIT 0x0100    // iterator is compared with a local var (var # found in lpVarLimit)
 #define LPFLG_CONST_LIMIT 0x0200  // iterator is compared with a constant (found in lpConstLimit)
 #define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit)
-#define LPFLG_SIMD_LIMIT 0x0080   // iterator is compared with Vector<T>.Count (found in lpConstLimit)
+#define LPFLG_SIMD_LIMIT 0x0080   // iterator is compared with vector element count (found in lpConstLimit)
 
 #define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop
 #define LPFLG_REMOVED 0x1000     // has been removed from the loop table (unrolled or optimized away)
index a6bbf81..df2bdfe 100644 (file)
@@ -10196,7 +10196,7 @@ void Compiler::gtDispConst(GenTree* tree)
 #ifdef FEATURE_SIMD
                 if ((tree->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
                 {
-                    printf(" Vector<T>.Count");
+                    printf(" vector element count");
                 }
 #endif
 
index dec6038..1cd9024 100644 (file)
@@ -321,6 +321,7 @@ int HWIntrinsicInfo::lookupNumArgs(const GenTreeHWIntrinsic* node)
 //    the expanded intrinsic.
 //
 GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
+                                  CORINFO_CLASS_HANDLE  clsHnd,
                                   CORINFO_METHOD_HANDLE method,
                                   CORINFO_SIG_INFO*     sig,
                                   bool                  mustExpand)
index 447d401..c00a92e 100644 (file)
@@ -38,6 +38,7 @@ HARDWARE_INTRINSIC(Vector128_AsSingle,                              "AsSingle",
 HARDWARE_INTRINSIC(Vector128_AsUInt16,                              "AsUInt16",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128_AsUInt32,                              "AsUInt32",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128_AsUInt64,                              "AsUInt64",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128_Count,                                 "get_Count",                             Vector128,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128_CreateScalarUnsafe,                    "CreateScalarUnsafe",                    Vector128,          -1,              16,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128_GetElement,                            "GetElement",                            Vector128,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128_WithElement,                           "WithElement",                           Vector128,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
@@ -62,6 +63,7 @@ HARDWARE_INTRINSIC(Vector256_AsSingle,                              "AsSingle",
 HARDWARE_INTRINSIC(Vector256_AsUInt16,                              "AsUInt16",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256_AsUInt32,                              "AsUInt32",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256_AsUInt64,                              "AsUInt64",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256_Count,                                 "get_Count",                             Vector256,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256_CreateScalarUnsafe,                    "CreateScalarUnsafe",                    Vector256,          -1,              32,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256_GetElement,                            "GetElement",                            Vector256,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector256_WithElement,                           "WithElement",                           Vector256,          -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
index 7835454..3b0c6f8 100644 (file)
@@ -757,6 +757,7 @@ static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC
 //    the expanded intrinsic.
 //
 GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
+                                  CORINFO_CLASS_HANDLE  clsHnd,
                                   CORINFO_METHOD_HANDLE method,
                                   CORINFO_SIG_INFO*     sig,
                                   bool                  mustExpand)
@@ -954,7 +955,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
     {
         case InstructionSet_Vector128:
         case InstructionSet_Vector256:
-            return impBaseIntrinsic(intrinsic, method, sig, mustExpand);
+            return impBaseIntrinsic(intrinsic, clsHnd, method, sig, mustExpand);
         case InstructionSet_SSE:
             return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_SSE2:
@@ -1002,6 +1003,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
 //    the expanded intrinsic.
 //
 GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
+                                    CORINFO_CLASS_HANDLE  clsHnd,
                                     CORINFO_METHOD_HANDLE method,
                                     CORINFO_SIG_INFO*     sig,
                                     bool                  mustExpand)
@@ -1035,12 +1037,15 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             retType = getSIMDTypeForSize(retSimdSize);
         }
     }
-    else
+    else if (retType == TYP_STRUCT)
     {
-        assert(retType == TYP_STRUCT);
         baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize);
         retType  = getSIMDTypeForSize(simdSize);
     }
+    else
+    {
+        baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize);
+    }
 
     if (!varTypeIsArithmetic(baseType))
     {
@@ -1094,6 +1099,17 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector128_Count:
+        case NI_Vector256_Count:
+        {
+            assert(sig->numArgs == 0);
+
+            GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT);
+            countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
+            retNode = countNode;
+            break;
+        }
+
         case NI_Vector128_CreateScalarUnsafe:
         {
             assert(sig->numArgs == 1);
index 993889d..83942d6 100644 (file)
@@ -3500,7 +3500,7 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
 
             if ((ni > NI_HW_INTRINSIC_START) && (ni < NI_HW_INTRINSIC_END))
             {
-                GenTree* hwintrinsic = impHWIntrinsic(ni, method, sig, mustExpand);
+                GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig, mustExpand);
 
                 if (mustExpand && (hwintrinsic == nullptr))
                 {
index 7749e92..00e518a 100644 (file)
@@ -3553,7 +3553,7 @@ void Compiler::optUnrollLoops()
         if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
         {
             // In stress mode, quadruple the size limit, and drop
-            // the restriction that loop limit must be Vector<T>.Count.
+            // the restriction that loop limit must be vector element count.
 
             unrollLimitSz *= 4;
             requiredFlags &= ~LPFLG_SIMD_LIMIT;