Optimize Vector128/256<T>.Get/WithElement (#22353)
authorFei Peng <feipeng.compiler@gmail.com>
Tue, 19 Feb 2019 15:50:15 +0000 (07:50 -0800)
committerTanner Gooding <tagoo@outlook.com>
Tue, 19 Feb 2019 15:50:15 +0000 (07:50 -0800)
src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs
src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs
src/jit/hwintrinsiclistxarch.h
src/jit/importer.cpp
tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_ro.csproj [new file with mode: 0644]

index 7060504..80da2c4 100644 (file)
@@ -232,6 +232,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>The value of the element at <paramref name="index" />.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception>
+        [Intrinsic]
         public T GetElement(int index)
         {
             ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
@@ -251,6 +252,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>A <see cref="Vector128{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in the current instance.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception>
+        [Intrinsic]
         public Vector128<T> WithElement(int index, T value)
         {
             ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
index 61191cc..968faf1 100644 (file)
@@ -233,6 +233,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>The value of the element at <paramref name="index" />.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception>
+        [Intrinsic]
         public T GetElement(int index)
         {
             ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
@@ -252,6 +253,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>A <see cref="Vector256{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in the current instance.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception>
+        [Intrinsic]
         public Vector256<T> WithElement(int index, T value)
         {
             ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
index 2cd82a5..8e02aab 100644 (file)
@@ -39,6 +39,8 @@ HARDWARE_INTRINSIC(Base_Vector128_AsUInt16,                         "AsUInt16",
 HARDWARE_INTRINSIC(Base_Vector128_AsUInt32,                         "AsUInt32",                                    Base,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector128_AsUInt64,                         "AsUInt64",                                    Base,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector128_CreateScalarUnsafe,               "CreateScalarUnsafe",                          Base,          -1,              16,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector128_GetElement,                       "GetElement",                                  Base,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Base_Vector128_WithElement,                      "WithElement",                                 Base,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Base_Vector128_ToScalar,                         "ToScalar",                                    Base,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector128_ToVector256,                      "ToVector256",                                 Base,          -1,              16,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector128_ToVector256Unsafe,                "ToVector256Unsafe",                           Base,          -1,              16,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
@@ -55,6 +57,8 @@ HARDWARE_INTRINSIC(Base_Vector256_AsUInt16,                         "AsUInt16",
 HARDWARE_INTRINSIC(Base_Vector256_AsUInt32,                         "AsUInt32",                                    Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_AsUInt64,                         "AsUInt64",                                    Base,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_CreateScalarUnsafe,               "CreateScalarUnsafe",                          Base,          -1,              32,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Base_Vector256_GetElement,                       "GetElement",                                  Base,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Base_Vector256_WithElement,                      "WithElement",                                 Base,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Base_Vector256_GetLower,                         "GetLower",                                    Base,          -1,              32,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_ToScalar,                         "ToScalar",                                    Base,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Base_Vector256_Zero,                             "get_Zero",                                    Base,          -1,              32,           0,     {INS_xorps,             INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps},             HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
index 477b124..e617e7e 100644 (file)
@@ -3482,6 +3482,8 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 case NI_Base_Vector128_AsUInt64:
 #if defined(_TARGET_XARCH_)
                 case NI_Base_Vector128_CreateScalarUnsafe:
+                case NI_Base_Vector128_GetElement:
+                case NI_Base_Vector128_WithElement:
                 case NI_Base_Vector128_ToScalar:
                 case NI_Base_Vector128_ToVector256:
                 case NI_Base_Vector128_ToVector256Unsafe:
@@ -3498,6 +3500,8 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 case NI_Base_Vector256_AsUInt32:
                 case NI_Base_Vector256_AsUInt64:
                 case NI_Base_Vector256_CreateScalarUnsafe:
+                case NI_Base_Vector256_GetElement:
+                case NI_Base_Vector256_WithElement:
                 case NI_Base_Vector256_GetLower:
                 case NI_Base_Vector256_ToScalar:
                 case NI_Base_Vector256_Zero:
@@ -4390,6 +4394,407 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             }
             break;
         }
+
+        case NI_Base_Vector256_WithElement:
+        {
+            if (!compSupports(InstructionSet_AVX))
+            {
+                // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers
+                return nullptr;
+            }
+            __fallthrough;
+        }
+        case NI_Base_Vector128_WithElement:
+        {
+            assert(sig->numArgs == 2);
+            GenTree* indexOp = impStackTop(1).val;
+            if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst())
+            {
+                // Using software fallback if
+                // 1. JIT/hardware don't support SSE2 instructions
+                // 2. baseType is not a numeric type (throw execptions)
+                // 3. index is not a constant
+                return nullptr;
+            }
+
+            switch (baseType)
+            {
+                // Using software fallback if baseType is not supported by hardware
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                case TYP_INT:
+                case TYP_UINT:
+                    if (!compSupports(InstructionSet_SSE41))
+                    {
+                        return nullptr;
+                    }
+                    break;
+                case TYP_LONG:
+                case TYP_ULONG:
+                    if (!compSupports(InstructionSet_SSE41_X64))
+                    {
+                        return nullptr;
+                    }
+                    break;
+                case TYP_DOUBLE:
+                case TYP_FLOAT:
+                case TYP_SHORT:
+                case TYP_USHORT:
+                    // short/ushort/float/double is supported by SSE2
+                    break;
+                default:
+                    unreached();
+                    break;
+            }
+
+            ssize_t imm8       = indexOp->AsIntCon()->IconValue();
+            ssize_t cachedImm8 = imm8;
+            ssize_t count      = simdSize / genTypeSize(baseType);
+
+            if (imm8 >= count || imm8 < 0)
+            {
+                // Using software fallback if index is out of range (throw exeception)
+                return nullptr;
+            }
+
+            GenTree* valueOp = impPopStack().val;
+            impPopStack();
+            GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd);
+
+            GenTree* clonedVectorOp = nullptr;
+
+            if (simdSize == 32)
+            {
+                // Extract the half vector that will be modified
+                assert(compSupports(InstructionSet_AVX));
+
+                // copy `vectorOp` to accept the modified half vector
+                vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                        nullptr DEBUGARG("Clone Vector for Vector256<T>.WithElement"));
+
+                if (imm8 >= count / 2)
+                {
+                    imm8 -= count / 2;
+                    vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
+                                                        baseType, simdSize);
+                }
+                else
+                {
+                    vectorOp =
+                        gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize);
+                }
+            }
+
+            GenTree* immNode = gtNewIconNode(imm8);
+
+            switch (baseType)
+            {
+                case TYP_LONG:
+                case TYP_ULONG:
+                    retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_X64_Insert,
+                                                       baseType, 16);
+                    break;
+
+                case TYP_FLOAT:
+                {
+                    if (!compSupports(InstructionSet_SSE41))
+                    {
+                        // Emulate Vector128<float>.WithElement by SSE instructions
+                        if (imm8 == 0)
+                        {
+                            // vector.WithElement(0, value)
+                            // =>
+                            // movss   xmm0, xmm1 (xmm0 = vector, xmm1 = value)
+                            valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp,
+                                                               NI_Base_Vector128_CreateScalarUnsafe, TYP_FLOAT, 16);
+                            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, NI_SSE_MoveScalar,
+                                                               TYP_FLOAT, 16);
+                        }
+                        else if (imm8 == 1)
+                        {
+                            // vector.WithElement(1, value)
+                            // =>
+                            // shufps  xmm1, xmm0, 0   (xmm0 = vector, xmm1 = value)
+                            // shufps  xmm1, xmm0, 226
+                            GenTree* tmpOp =
+                                gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
+                                                         TYP_FLOAT, 16);
+                            GenTree* dupVectorOp = nullptr;
+                            vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                                    nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement"));
+                            tmpOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, vectorOp, gtNewIconNode(0),
+                                                             NI_SSE_Shuffle, TYP_FLOAT, 16);
+                            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, dupVectorOp, gtNewIconNode(226),
+                                                               NI_SSE_Shuffle, TYP_FLOAT, 16);
+                        }
+                        else
+                        {
+                            ssize_t controlBits1 = 0;
+                            ssize_t controlBits2 = 0;
+                            if (imm8 == 2)
+                            {
+                                controlBits1 = 48;
+                                controlBits2 = 132;
+                            }
+                            else
+                            {
+                                controlBits1 = 32;
+                                controlBits2 = 36;
+                            }
+                            // vector.WithElement(2, value)
+                            // =>
+                            // shufps  xmm1, xmm0, 48   (xmm0 = vector, xmm1 = value)
+                            // shufps  xmm0, xmm1, 132
+                            //
+                            // vector.WithElement(3, value)
+                            // =>
+                            // shufps  xmm1, xmm0, 32   (xmm0 = vector, xmm1 = value)
+                            // shufps  xmm0, xmm1, 36
+                            GenTree* tmpOp =
+                                gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
+                                                         TYP_FLOAT, 16);
+                            GenTree* dupVectorOp = nullptr;
+                            vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                                    nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement"));
+                            valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, tmpOp, gtNewIconNode(controlBits1),
+                                                               NI_SSE_Shuffle, TYP_FLOAT, 16);
+                            retNode =
+                                gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, dupVectorOp, gtNewIconNode(controlBits2),
+                                                         NI_SSE_Shuffle, TYP_FLOAT, 16);
+                        }
+                        break;
+                    }
+                    else
+                    {
+                        valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
+                                                           TYP_FLOAT, 16);
+                        immNode->AsIntCon()->SetIconValue(imm8 * 16);
+                        __fallthrough;
+                    }
+                }
+
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                case TYP_INT:
+                case TYP_UINT:
+                    retNode =
+                        gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_Insert, baseType, 16);
+                    break;
+
+                case TYP_SHORT:
+                case TYP_USHORT:
+                    retNode =
+                        gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE2_Insert, baseType, 16);
+                    break;
+
+                case TYP_DOUBLE:
+                {
+                    // vector.WithElement(0, value)
+                    // =>
+                    // movsd   xmm0, xmm1  (xmm0 = vector, xmm1 = value)
+                    //
+                    // vector.WithElement(1, value)
+                    // =>
+                    // unpcklpd  xmm0, xmm1  (xmm0 = vector, xmm1 = value)
+                    valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
+                                                       TYP_DOUBLE, 16);
+                    NamedIntrinsic in = (imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow;
+                    retNode           = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, in, TYP_DOUBLE, 16);
+                    break;
+                }
+
+                default:
+                    unreached();
+                    break;
+            }
+
+            if (simdSize == 32)
+            {
+                assert(clonedVectorOp);
+                int upperOrLower = (cachedImm8 >= count / 2) ? 1 : 0;
+                retNode = gtNewSimdHWIntrinsicNode(retType, clonedVectorOp, retNode, gtNewIconNode(upperOrLower),
+                                                   NI_AVX_InsertVector128, baseType, simdSize);
+            }
+
+            break;
+        }
+
+        case NI_Base_Vector256_GetElement:
+        {
+            if (!compSupports(InstructionSet_AVX))
+            {
+                // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers
+                return nullptr;
+            }
+            __fallthrough;
+        }
+        case NI_Base_Vector128_GetElement:
+        {
+            assert(sig->numArgs == 1);
+            GenTree* indexOp = impStackTop().val;
+            if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst())
+            {
+                // Using software fallback if
+                // 1. JIT/hardware don't support SSE2 instructions
+                // 2. baseType is not a numeric type (throw execptions)
+                // 3. index is not a constant
+                return nullptr;
+            }
+
+            switch (baseType)
+            {
+                // Using software fallback if baseType is not supported by hardware
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                case TYP_INT:
+                case TYP_UINT:
+                    if (!compSupports(InstructionSet_SSE41))
+                    {
+                        return nullptr;
+                    }
+                    break;
+                case TYP_LONG:
+                case TYP_ULONG:
+                    if (!compSupports(InstructionSet_SSE41_X64))
+                    {
+                        return nullptr;
+                    }
+                    break;
+                case TYP_DOUBLE:
+                case TYP_FLOAT:
+                case TYP_SHORT:
+                case TYP_USHORT:
+                    // short/ushort/float/double is supported by SSE2
+                    break;
+                default:
+                    break;
+            }
+
+            ssize_t imm8  = indexOp->AsIntCon()->IconValue();
+            ssize_t count = simdSize / genTypeSize(baseType);
+
+            if (imm8 >= count || imm8 < 0)
+            {
+                // Using software fallback if index is out of range (throw exeception)
+                return nullptr;
+            }
+
+            impPopStack();
+            GenTree*       vectorOp     = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd);
+            NamedIntrinsic resIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                assert(compSupports(InstructionSet_AVX));
+                if (imm8 >= count / 2)
+                {
+                    imm8 -= count / 2;
+                    vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
+                                                        baseType, simdSize);
+                }
+                else
+                {
+                    vectorOp =
+                        gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize);
+                }
+            }
+
+            if (imm8 == 0 && (genTypeSize(baseType) >= 4))
+            {
+                switch (baseType)
+                {
+                    case TYP_LONG:
+                        resIntrinsic = NI_SSE2_X64_ConvertToInt64;
+                        break;
+                    case TYP_ULONG:
+                        resIntrinsic = NI_SSE2_X64_ConvertToUInt64;
+                        break;
+                    case TYP_INT:
+                        resIntrinsic = NI_SSE2_ConvertToInt32;
+                        break;
+                    case TYP_UINT:
+                        resIntrinsic = NI_SSE2_ConvertToUInt32;
+                        break;
+                    case TYP_FLOAT:
+                    case TYP_DOUBLE:
+                        resIntrinsic = NI_Base_Vector128_ToScalar;
+                        break;
+                    default:
+                        unreached();
+                }
+                return gtNewSimdHWIntrinsicNode(retType, vectorOp, resIntrinsic, baseType, 16);
+            }
+
+            GenTree* immNode = gtNewIconNode(imm8);
+
+            switch (baseType)
+            {
+                case TYP_LONG:
+                case TYP_ULONG:
+                    retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_X64_Extract, baseType, 16);
+                    break;
+
+                case TYP_FLOAT:
+                {
+                    if (!compSupports(InstructionSet_SSE41))
+                    {
+                        assert(imm8 >= 1);
+                        assert(imm8 <= 3);
+                        // Emulate Vector128<float>.GetElement(i) by SSE instructions
+                        // vector.GetElement(i)
+                        // =>
+                        // shufps  xmm0, xmm0, control
+                        // (xmm0 = vector, control = i + 228)
+                        immNode->AsIntCon()->SetIconValue(228 + imm8);
+                        GenTree* clonedVectorOp = nullptr;
+                        vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                                nullptr DEBUGARG("Clone Vector for Vector128<float>.GetElement"));
+                        vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, clonedVectorOp, immNode,
+                                                            NI_SSE_Shuffle, TYP_FLOAT, 16);
+                        return gtNewSimdHWIntrinsicNode(retType, vectorOp, NI_Base_Vector128_ToScalar, TYP_FLOAT, 16);
+                    }
+                    __fallthrough;
+                }
+                case TYP_UBYTE:
+                case TYP_INT:
+                case TYP_UINT:
+                    retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_Extract, baseType, 16);
+                    break;
+
+                case TYP_BYTE:
+                    // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result
+                    retNode = gtNewSimdHWIntrinsicNode(TYP_UBYTE, vectorOp, immNode, NI_SSE41_Extract, TYP_UBYTE, 16);
+                    retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_BYTE);
+                    break;
+
+                case TYP_SHORT:
+                case TYP_USHORT:
+                    // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result
+                    retNode = gtNewSimdHWIntrinsicNode(TYP_USHORT, vectorOp, immNode, NI_SSE2_Extract, TYP_USHORT, 16);
+                    if (baseType == TYP_SHORT)
+                    {
+                        retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_SHORT);
+                    }
+                    break;
+
+                case TYP_DOUBLE:
+                    assert(imm8 == 1);
+                    // vector.GetElement(1)
+                    // =>
+                    // pshufd xmm1, xmm0, 0xEE (xmm0 = vector)
+                    vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(0xEE), NI_SSE2_Shuffle,
+                                                        TYP_INT, 16);
+                    retNode =
+                        gtNewSimdHWIntrinsicNode(TYP_DOUBLE, vectorOp, NI_Base_Vector128_ToScalar, TYP_DOUBLE, 16);
+                    break;
+
+                default:
+                    unreached();
+            }
+
+            break;
+        }
+
 #endif // _TARGET_XARCH_
 
         default:
@@ -4683,6 +5088,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                             }
                         }
 #if defined(_TARGET_XARCH_)
+                        else if (strcmp(methodName, "GetElement") == 0)
+                        {
+                            result = NI_Base_Vector128_GetElement;
+                        }
+                        else if (strcmp(methodName, "WithElement") == 0)
+                        {
+                            result = NI_Base_Vector128_WithElement;
+                        }
                         else if (strcmp(methodName, "get_Zero") == 0)
                         {
                             result = NI_Base_Vector128_Zero;
@@ -4783,6 +5196,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
                         {
                             result = NI_Base_Vector256_GetLower;
                         }
+                        else if (strcmp(methodName, "GetElement") == 0)
+                        {
+                            result = NI_Base_Vector256_GetElement;
+                        }
+                        else if (strcmp(methodName, "WithElement") == 0)
+                        {
+                            result = NI_Base_Vector256_WithElement;
+                        }
                         else if (strcmp(methodName, "ToScalar") == 0)
                         {
                             result = NI_Base_Vector256_ToScalar;
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957.cs b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957.cs
new file mode 100644 (file)
index 0000000..1e65f75
--- /dev/null
@@ -0,0 +1,208 @@
+using System;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace GitHub_17957
+{
+    class Program
+    {
+        const int Pass = 100;
+        const int Fail = 0;
+
+        static int Main(string[] args)
+        {
+            return (Test128() && Test256()) ? Pass : Fail;
+        }
+
+        public static bool Test128()
+        {
+            Vector128<short> vs = Vector128<short>.Zero;
+            vs = vs.WithElement(0, -1);
+            if (vs.GetElement(0) != -1)
+            {
+                return false;
+            }
+
+            vs = vs.WithElement(3, -1);
+            if (vs.GetElement(3) != -1)
+            {
+                return false;
+            }
+
+            vs = vs.WithElement(7, -1);
+            if (vs.GetElement(7) != -1)
+            {
+                return false;
+            }
+
+
+            Vector128<ushort> vus = Vector128<ushort>.Zero;
+            vus = vus.WithElement(0, ushort.MaxValue);
+            if (vus.GetElement(0) != ushort.MaxValue)
+            {
+                return false;
+            }
+
+            vus = vus.WithElement(3, ushort.MaxValue);
+            if (vus.GetElement(3) != ushort.MaxValue)
+            {
+                return false;
+            }
+
+            vus = vus.WithElement(7, ushort.MaxValue);
+            if (vus.GetElement(7) != ushort.MaxValue)
+            {
+                return false;
+            }
+
+
+            Vector128<sbyte> vsb = Vector128<sbyte>.Zero;
+            vsb = vsb.WithElement(0, -1);
+            if (vsb.GetElement(0) != -1)
+            {
+                return false;
+            }
+
+            vsb = vsb.WithElement(7, -1);
+            if (vsb.GetElement(7) != -1)
+            {
+                return false;
+            }
+
+            vsb = vsb.WithElement(15, -1);
+            if (vsb.GetElement(15) != -1)
+            {
+                return false;
+            }
+
+            Vector128<byte> vb = Vector128<byte>.Zero;
+            vb = vb.WithElement(0, byte.MaxValue);
+            if (vb.GetElement(0) != byte.MaxValue)
+            {
+                return false;
+            }
+
+            vb = vb.WithElement(7, byte.MaxValue);
+            if (vb.GetElement(7) != byte.MaxValue)
+            {
+                return false;
+            }
+
+            vb = vb.WithElement(15, byte.MaxValue);
+            if (vb.GetElement(15) != byte.MaxValue)
+            {
+                return false;
+            }
+
+            Vector128<float> vf = Vector128<float>.Zero;
+            vf = vf.WithElement(0, -1.0f);
+            if (vf.GetElement(0) != -1.0f)
+            {
+                return false;
+            }
+
+            vf = vf.WithElement(1, -1f);
+            if (vf.GetElement(1) != -1.0f)
+            {
+                return false;
+            }
+
+            vf = vf.WithElement(2, -1f);
+            if (vf.GetElement(2) != -1.0f)
+            {
+                return false;
+            }
+
+            vf = vf.WithElement(3, -1.0f);
+            if (vf.GetElement(3) != -1.0f)
+            {
+                return false;
+            }
+
+            return true;
+        }
+
+        public static bool Test256()
+        {
+            Vector256<short> vs = Vector256<short>.Zero;
+            vs = vs.WithElement(0, -1);
+            if (vs.GetElement(0) != -1)
+            {
+                return false;
+            }
+
+            vs = vs.WithElement(3, -1);
+            if (vs.GetElement(3) != -1)
+            {
+                return false;
+            }
+
+            vs = vs.WithElement(9, -1);
+            if (vs.GetElement(9) != -1)
+            {
+                return false;
+            }
+
+
+            Vector256<ushort> vus = Vector256<ushort>.Zero;
+            vus = vus.WithElement(0, ushort.MaxValue);
+            if (vus.GetElement(0) != ushort.MaxValue)
+            {
+                return false;
+            }
+
+            vus = vus.WithElement(3, ushort.MaxValue);
+            if (vus.GetElement(3) != ushort.MaxValue)
+            {
+                return false;
+            }
+
+            vus = vus.WithElement(8, ushort.MaxValue);
+            if (vus.GetElement(8) != ushort.MaxValue)
+            {
+                return false;
+            }
+
+
+            Vector256<sbyte> vsb = Vector256<sbyte>.Zero;
+            vsb = vsb.WithElement(0, -1);
+            if (vsb.GetElement(0) != -1)
+            {
+                return false;
+            }
+
+            vsb = vsb.WithElement(7, -1);
+            if (vsb.GetElement(7) != -1)
+            {
+                return false;
+            }
+
+            vsb = vsb.WithElement(16, -1);
+            if (vsb.GetElement(16) != -1)
+            {
+                return false;
+            }
+
+            Vector256<byte> vb = Vector256<byte>.Zero;
+            vb = vb.WithElement(0, byte.MaxValue);
+            if (vb.GetElement(0) != byte.MaxValue)
+            {
+                return false;
+            }
+
+            vb = vb.WithElement(7, byte.MaxValue);
+            if (vb.GetElement(7) != byte.MaxValue)
+            {
+                return false;
+            }
+
+            vb = vb.WithElement(17, byte.MaxValue);
+            if (vb.GetElement(17) != byte.MaxValue)
+            {
+                return false;
+            }
+
+            return true;
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_r.csproj
new file mode 100644 (file)
index 0000000..ae0490a
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize></Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="GitHub_17957.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_ro.csproj
new file mode 100644 (file)
index 0000000..3995aa7
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="GitHub_17957.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>