Optimize ToScalar() and GetElement() to use arm64 intrinsic (#36156)
authorKunal Pathak <Kunal.Pathak@microsoft.com>
Thu, 14 May 2020 21:06:45 +0000 (14:06 -0700)
committerGitHub <noreply@github.com>
Thu, 14 May 2020 21:06:45 +0000 (14:06 -0700)
* ARM64 intrisic for ToScalar() and GetElement()

* Fixed GetElement to just operate on constants

* Fix bug in rationalize for Vector64<long>

* fix NotSupported issue for GetElement and ToScalar

* Reuse the baseType/retType in impSpecialIntrinsic and impBaseIntrinsic

* Update comment

* fix breaks

* add comments

* ran jit-format

* Refactored to move common logic inside isSupportedBaseType

* review comments

* reuse simdSize

* formatting

* one missing formatting

src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/hwintrinsic.cpp
src/coreclr/src/jit/hwintrinsicarm64.cpp
src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp
src/coreclr/src/jit/hwintrinsiclistarm64.h
src/coreclr/src/jit/hwintrinsicxarch.cpp
src/coreclr/src/jit/lowerarmarch.cpp
src/coreclr/src/jit/rationalize.cpp
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs

index a8e6220..b76f905 100644 (file)
@@ -3752,7 +3752,10 @@ protected:
     GenTree* impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                                  CORINFO_CLASS_HANDLE  clsHnd,
                                  CORINFO_METHOD_HANDLE method,
-                                 CORINFO_SIG_INFO*     sig);
+                                 CORINFO_SIG_INFO*     sig,
+                                 var_types             baseType,
+                                 var_types             retType,
+                                 unsigned              simdSize);
 
     GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr = false);
     GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType);
@@ -3762,7 +3765,10 @@ protected:
     GenTree* impBaseIntrinsic(NamedIntrinsic        intrinsic,
                               CORINFO_CLASS_HANDLE  clsHnd,
                               CORINFO_METHOD_HANDLE method,
-                              CORINFO_SIG_INFO*     sig);
+                              CORINFO_SIG_INFO*     sig,
+                              var_types             baseType,
+                              var_types             retType,
+                              unsigned              simdSize);
     GenTree* impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
     GenTree* impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
     GenTree* impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
index e0e8350..d0b2c1b 100644 (file)
@@ -584,6 +584,40 @@ static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC
 }
 
 //------------------------------------------------------------------------
+// isSupportedBaseType
+//
+// Arguments:
+//    intrinsicId - HW intrinsic id
+//    baseType - Base type of the intrinsic.
+//
+// Return Value:
+//    returns true if the baseType is supported for given intrinsic.
+//
+static bool isSupportedBaseType(NamedIntrinsic intrinsic, var_types baseType)
+{
+    // We don't actually check the intrinsic outside of the false case as we expect
+    // the exposed managed signatures are either generic and support all types
+    // or they are explicit and support the type indicated.
+    if (varTypeIsArithmetic(baseType))
+    {
+        return true;
+    }
+
+#ifdef TARGET_XARCH
+    assert((intrinsic >= NI_Vector128_As && intrinsic <= NI_Vector128_AsUInt64) ||
+           (intrinsic >= NI_Vector128_get_AllBitsSet && intrinsic <= NI_Vector128_ToVector256Unsafe) ||
+           (intrinsic >= NI_Vector256_As && intrinsic <= NI_Vector256_AsUInt64) ||
+           (intrinsic >= NI_Vector256_get_AllBitsSet && intrinsic <= NI_Vector256_ToScalar));
+#else
+    assert((intrinsic >= NI_Vector64_AsByte && intrinsic <= NI_Vector64_AsUInt32) ||
+           (intrinsic >= NI_Vector64_get_AllBitsSet && intrinsic <= NI_Vector64_ToScalar) ||
+           (intrinsic >= NI_Vector128_As && intrinsic <= NI_Vector128_AsUInt64) ||
+           (intrinsic >= NI_Vector128_get_AllBitsSet && intrinsic <= NI_Vector128_ToScalar));
+#endif
+    return false;
+}
+
+//------------------------------------------------------------------------
 // impHWIntrinsic: Import a hardware intrinsic as a GT_HWINTRINSIC node if possible
 //
 // Arguments:
@@ -614,9 +648,38 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
         baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
         retType  = getSIMDTypeForSize(sizeBytes);
         assert(sizeBytes != 0);
+
+        // We want to return early here for cases where retType was TYP_STRUCT as per method signature and
+        // rather than deferring the decision after getting the baseType of arg.
+        if (!isSupportedBaseType(intrinsic, baseType))
+        {
+            return nullptr;
+        }
+    }
+
+    baseType = getBaseTypeFromArgIfNeeded(intrinsic, clsHnd, sig, baseType);
+
+    if (baseType == TYP_UNKNOWN)
+    {
+        if (category != HW_Category_Scalar)
+        {
+            unsigned int sizeBytes;
+            baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &sizeBytes);
+            assert((category == HW_Category_Special) || (sizeBytes != 0));
+        }
+        else
+        {
+            baseType = retType;
+        }
+    }
+
+    // Immediately return if the category is other than scalar/special and this is not a supported base type.
+    if ((category != HW_Category_Special) && (category != HW_Category_Scalar) &&
+        !isSupportedBaseType(intrinsic, baseType))
+    {
+        return nullptr;
     }
 
-    baseType          = getBaseTypeFromArgIfNeeded(intrinsic, clsHnd, sig, baseType);
     unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
 
     GenTree* immOp = nullptr;
@@ -836,7 +899,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
         return retNode;
     }
 
-    return impSpecialIntrinsic(intrinsic, clsHnd, method, sig);
+    return impSpecialIntrinsic(intrinsic, clsHnd, method, sig, baseType, retType, simdSize);
 }
 
 #endif // FEATURE_HW_INTRINSICS
index 7127b69..a7d5c21 100644 (file)
@@ -207,6 +207,8 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic intrinsic, int simdSize,
             case NI_AdvSimd_ExtractVector64:
             case NI_AdvSimd_Insert:
             case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128:
+            case NI_Vector64_GetElement:
+            case NI_Vector128_GetElement:
                 immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType);
                 break;
 
@@ -260,7 +262,9 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT
 //    intrinsic  -- id of the intrinsic function.
 //    clsHnd     -- class handle containing the intrinsic function.
 //    method     -- method handle of the intrinsic function.
-//    sig        -- signature of the intrinsic call
+//    sig        -- signature of the intrinsic call.
+//    baseType   -- generic argument of the intrinsic.
+//    retType    -- return type of the intrinsic.
 //
 // Return Value:
 //    The GT_HWINTRINSIC node, or nullptr if not a supported intrinsic
@@ -268,52 +272,16 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT
 GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                                        CORINFO_CLASS_HANDLE  clsHnd,
                                        CORINFO_METHOD_HANDLE method,
-                                       CORINFO_SIG_INFO*     sig)
+                                       CORINFO_SIG_INFO*     sig,
+                                       var_types             baseType,
+                                       var_types             retType,
+                                       unsigned              simdSize)
 {
     HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic);
     int                 numArgs  = sig->numArgs;
-    var_types           retType  = JITtype2varType(sig->retType);
-    var_types           baseType = TYP_UNKNOWN;
 
-    if ((retType == TYP_STRUCT) && featureSIMD)
-    {
-        unsigned int sizeBytes;
-        baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
-        retType  = getSIMDTypeForSize(sizeBytes);
-        assert(sizeBytes != 0);
-
-        if (!varTypeIsArithmetic(baseType))
-        {
-            assert((intrinsic == NI_Vector64_AsByte) || (intrinsic == NI_Vector128_As) ||
-                   (intrinsic == NI_Vector64_get_Zero) || (intrinsic == NI_Vector64_get_AllBitsSet) ||
-                   (intrinsic == NI_Vector128_get_Zero) || (intrinsic == NI_Vector128_get_AllBitsSet));
-            return nullptr;
-        }
-    }
-
-    baseType = getBaseTypeFromArgIfNeeded(intrinsic, clsHnd, sig, baseType);
-
-    if (baseType == TYP_UNKNOWN)
-    {
-        if (category != HW_Category_Scalar)
-        {
-            unsigned int sizeBytes;
-            baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &sizeBytes);
-            assert(sizeBytes != 0);
-        }
-        else
-        {
-            baseType = retType;
-        }
-    }
-
-    if (!varTypeIsArithmetic(baseType))
-    {
-        return nullptr;
-    }
-
-    unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
     assert(numArgs >= 0);
+    assert(varTypeIsArithmetic(baseType));
 
     GenTree* retNode = nullptr;
     GenTree* op1     = nullptr;
index 7aca222..e2093a5 100644 (file)
@@ -617,6 +617,29 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             }
             break;
 
+            case NI_Vector64_GetElement:
+            case NI_Vector128_GetElement:
+            case NI_Vector64_ToScalar:
+            case NI_Vector128_ToScalar:
+            {
+                ssize_t indexValue = 0;
+                if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement))
+                {
+                    assert(intrin.op2->IsCnsIntOrI());
+                    indexValue = intrin.op2->AsIntCon()->gtIconVal;
+                }
+
+                // no-op if vector is float/double, targetReg == op1Reg and fetching for 0th index.
+                if ((varTypeIsFloating(intrin.baseType) && (targetReg == op1Reg) && (indexValue == 0)))
+                {
+                    break;
+                }
+
+                GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, indexValue,
+                                            INS_OPTS_NONE);
+            }
+            break;
+
             default:
                 unreached();
         }
index 0ca6f8d..80a5f48 100644 (file)
@@ -28,6 +28,8 @@ HARDWARE_INTRINSIC(Vector64,        CreateScalarUnsafe,
 HARDWARE_INTRINSIC(Vector64,        get_AllBitsSet,                             8,           0,     {INS_mvni,              INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},              HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector64,        get_Count,                                  8,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,        get_Zero,                                   8,           0,     {INS_movi,              INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi},              HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(Vector64,        GetElement,                                 8,           2,     {INS_smov,              INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},               HW_Category_IMM,                    HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(Vector64,        ToScalar,                                   8,           1,     {INS_smov,              INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},               HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                            SIMD size       NumArg                                                                                                     Instructions                                                                                                     Category                            Flags
@@ -50,6 +52,8 @@ HARDWARE_INTRINSIC(Vector128,       CreateScalarUnsafe,                        1
 HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                            16,           0,     {INS_mvni,              INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},              HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,       get_Count,                                 16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       get_Zero,                                  16,           0,     {INS_movi,              INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi,           INS_movi},              HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(Vector128,       GetElement,                                16,           2,     {INS_smov,              INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},               HW_Category_IMM,                    HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(Vector128,       ToScalar,                                  16,           1,     {INS_smov,              INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},               HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                            SIMD size       NumArg                                                                                                     Instructions                                                                                                     Category                            Flags
index 1ed4dd7..214ce21 100644 (file)
@@ -469,22 +469,26 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT
 //    intrinsic  -- id of the intrinsic function.
 //    clsHnd     -- class handle containing the intrinsic function.
 //    method     -- method handle of the intrinsic function.
-//    sig        -- signature of the intrinsic call
-//
+//    sig        -- signature of the intrinsic call.
+//    baseType   -- generic argument of the intrinsic.
+//    retType    -- return type of the intrinsic.
 // Return Value:
 //    the expanded intrinsic.
 //
 GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                                        CORINFO_CLASS_HANDLE  clsHnd,
                                        CORINFO_METHOD_HANDLE method,
-                                       CORINFO_SIG_INFO*     sig)
+                                       CORINFO_SIG_INFO*     sig,
+                                       var_types             baseType,
+                                       var_types             retType,
+                                       unsigned              simdSize)
 {
     // other intrinsics need special importation
     switch (HWIntrinsicInfo::lookupIsa(intrinsic))
     {
         case InstructionSet_Vector128:
         case InstructionSet_Vector256:
-            return impBaseIntrinsic(intrinsic, clsHnd, method, sig);
+            return impBaseIntrinsic(intrinsic, clsHnd, method, sig, baseType, retType, simdSize);
         case InstructionSet_SSE:
             return impSSEIntrinsic(intrinsic, method, sig);
         case InstructionSet_SSE2:
@@ -509,15 +513,19 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 // Arguments:
 //    intrinsic  -- id of the intrinsic function.
 //    method     -- method handle of the intrinsic function.
-//    sig        -- signature of the intrinsic call
-//
+//    sig        -- signature of the intrinsic call.
+//    baseType   -- generic argument of the intrinsic.
+//    retType    -- return type of the intrinsic.
 // Return Value:
 //    the expanded intrinsic.
 //
 GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
                                     CORINFO_CLASS_HANDLE  clsHnd,
                                     CORINFO_METHOD_HANDLE method,
-                                    CORINFO_SIG_INFO*     sig)
+                                    CORINFO_SIG_INFO*     sig,
+                                    var_types             baseType,
+                                    var_types             retType,
+                                    unsigned              simdSize)
 {
     GenTree* retNode = nullptr;
     GenTree* op1     = nullptr;
@@ -528,42 +536,6 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
         return nullptr;
     }
 
-    unsigned  simdSize = 0;
-    var_types baseType = TYP_UNKNOWN;
-    var_types retType  = JITtype2varType(sig->retType);
-
-    assert(!sig->hasThis());
-
-    if (HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic))
-    {
-        baseType = getBaseTypeAndSizeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args), &simdSize);
-
-        if (retType == TYP_STRUCT)
-        {
-            unsigned  retSimdSize = 0;
-            var_types retBasetype = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &retSimdSize);
-            if (!varTypeIsArithmetic(retBasetype))
-            {
-                return nullptr;
-            }
-            retType = getSIMDTypeForSize(retSimdSize);
-        }
-    }
-    else if (retType == TYP_STRUCT)
-    {
-        baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize);
-        retType  = getSIMDTypeForSize(simdSize);
-    }
-    else
-    {
-        baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize);
-    }
-
-    if (!varTypeIsArithmetic(baseType))
-    {
-        return nullptr;
-    }
-
     switch (intrinsic)
     {
         case NI_Vector256_As:
@@ -618,7 +590,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             if (getSIMDVectorRegisterByteLength() == YMM_REGSIZE_BYTES)
             {
                 // Vector<T> is TYP_SIMD32, so we should treat this as a call to Vector128.ToVector256
-                return impBaseIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig);
+                return impBaseIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, baseType, retType, simdSize);
             }
 
             assert(getSIMDVectorRegisterByteLength() == XMM_REGSIZE_BYTES);
@@ -659,6 +631,11 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Vector128_AsVector128:
         {
             assert(sig->numArgs == 1);
+            assert(HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic));
+
+            var_types baseTypeOfIntrinsic =
+                getBaseTypeAndSizeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args), &simdSize);
+            assert(baseType == baseTypeOfIntrinsic);
 
             switch (getSIMDTypeForSize(simdSize))
             {
@@ -686,7 +663,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
                 case TYP_SIMD32:
                 {
                     // Vector<T> is TYP_SIMD32, so we should treat this as a call to Vector256.GetLower
-                    return impBaseIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig);
+                    return impBaseIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig, baseType, retType, simdSize);
                 }
 
                 default:
@@ -725,12 +702,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
 
                 if (intrinsic == NI_Vector256_AsVector)
                 {
-                    return impBaseIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig);
+                    return impBaseIntrinsic(NI_Vector256_GetLower, clsHnd, method, sig, baseType, retType, simdSize);
                 }
                 else
                 {
                     assert(intrinsic == NI_Vector256_AsVector256);
-                    return impBaseIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig);
+                    return impBaseIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, baseType, retType, simdSize);
                 }
             }
 
index d5272ab..899531b 100644 (file)
@@ -1160,6 +1160,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
         case NI_AdvSimd_DuplicateSelectedScalarToVector128:
         case NI_AdvSimd_Extract:
         case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128:
+        case NI_Vector64_GetElement:
+        case NI_Vector128_GetElement:
             if (intrin.op2->IsCnsIntOrI())
             {
                 MakeSrcContained(node, intrin.op2);
index 07bdd29..d2f8134 100644 (file)
@@ -785,10 +785,18 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge
             // type(s).
             if ((hwIntrinsicNode->gtType == TYP_I_IMPL) && (hwIntrinsicNode->gtSIMDSize == TARGET_POINTER_SIZE))
             {
-                // This happens when it is consumed by a GT_RET_EXPR.
-                // It can only be a Vector2f or Vector2i.
-                assert(genTypeSize(hwIntrinsicNode->gtSIMDBaseType) == 4);
-                hwIntrinsicNode->gtType = TYP_SIMD8;
+#ifdef TARGET_ARM64
+                // Special case for GetElement/ToScalar because they take Vector64<T> and return T
+                // and T can be long or ulong.
+                if (!(hwIntrinsicNode->gtHWIntrinsicId == NI_Vector64_GetElement ||
+                      hwIntrinsicNode->gtHWIntrinsicId == NI_Vector64_ToScalar))
+#endif
+                {
+                    // This happens when it is consumed by a GT_RET_EXPR.
+                    // It can only be a Vector2f or Vector2i.
+                    assert(genTypeSize(hwIntrinsicNode->gtSIMDBaseType) == 4);
+                    hwIntrinsicNode->gtType = TYP_SIMD8;
+                }
             }
             break;
         }
index f376ebf..873056c 100644 (file)
@@ -885,6 +885,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>The value of the element at <paramref name="index" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception>
+        [Intrinsic]
         public static T GetElement<T>(this Vector64<T> vector, int index)
             where T : struct
         {
@@ -928,6 +929,7 @@ namespace System.Runtime.Intrinsics
         /// <param name="vector">The vector to get the first element from.</param>
         /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
+        [Intrinsic]
         public static T ToScalar<T>(this Vector64<T> vector)
             where T : struct
         {