From d466bdbd47ab7a861046b99340e16e037098fb2a Mon Sep 17 00:00:00 2001 From: Fei Peng Date: Tue, 19 Feb 2019 07:50:15 -0800 Subject: [PATCH] Optimize Vector128/256.Get/WithElement (#22353) --- .../System/Runtime/Intrinsics/Vector128_1.cs | 2 + .../System/Runtime/Intrinsics/Vector256_1.cs | 2 + src/jit/hwintrinsiclistxarch.h | 4 + src/jit/importer.cpp | 421 +++++++++++++++++++++ .../X86/Regression/GitHub_17957/GitHub_17957.cs | 208 ++++++++++ .../Regression/GitHub_17957/GitHub_17957_r.csproj | 34 ++ .../Regression/GitHub_17957/GitHub_17957_ro.csproj | 34 ++ 7 files changed, 705 insertions(+) create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957.cs create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_r.csproj create mode 100644 tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_ro.csproj diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs index 7060504..80da2c4 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs @@ -232,6 +232,7 @@ namespace System.Runtime.Intrinsics /// The value of the element at . /// The type of the current instance () is not supported. /// was less than zero or greater than the number of elements. + [Intrinsic] public T GetElement(int index) { ThrowHelper.ThrowForUnsupportedVectorBaseType(); @@ -251,6 +252,7 @@ namespace System.Runtime.Intrinsics /// A with the value of the element at set to and the remaining elements set to the same value as that in the current instance. /// The type of the current instance () is not supported. /// was less than zero or greater than the number of elements. + [Intrinsic] public Vector128 WithElement(int index, T value) { ThrowHelper.ThrowForUnsupportedVectorBaseType(); diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs index 61191cc..968faf1 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs @@ -233,6 +233,7 @@ namespace System.Runtime.Intrinsics /// The value of the element at . /// The type of the current instance () is not supported. /// was less than zero or greater than the number of elements. + [Intrinsic] public T GetElement(int index) { ThrowHelper.ThrowForUnsupportedVectorBaseType(); @@ -252,6 +253,7 @@ namespace System.Runtime.Intrinsics /// A with the value of the element at set to and the remaining elements set to the same value as that in the current instance. /// The type of the current instance () is not supported. /// was less than zero or greater than the number of elements. + [Intrinsic] public Vector256 WithElement(int index, T value) { ThrowHelper.ThrowForUnsupportedVectorBaseType(); diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 2cd82a5..8e02aab 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -39,6 +39,8 @@ HARDWARE_INTRINSIC(Base_Vector128_AsUInt16, "AsUInt16", HARDWARE_INTRINSIC(Base_Vector128_AsUInt32, "AsUInt32", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector128_AsUInt64, "AsUInt64", Base, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector128_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector128_GetElement, "GetElement", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Base_Vector128_WithElement, "WithElement", Base, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Base_Vector128_ToScalar, "ToScalar", Base, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector128_ToVector256, "ToVector256", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector128_ToVector256Unsafe, "ToVector256Unsafe", Base, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) @@ -55,6 +57,8 @@ HARDWARE_INTRINSIC(Base_Vector256_AsUInt16, "AsUInt16", HARDWARE_INTRINSIC(Base_Vector256_AsUInt32, "AsUInt32", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector256_AsUInt64, "AsUInt64", Base, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector256_CreateScalarUnsafe, "CreateScalarUnsafe", Base, -1, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Base_Vector256_GetElement, "GetElement", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Base_Vector256_WithElement, "WithElement", Base, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Base_Vector256_GetLower, "GetLower", Base, -1, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector256_ToScalar, "ToScalar", Base, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Base_Vector256_Zero, "get_Zero", Base, -1, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 477b124..e617e7e 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -3482,6 +3482,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_Base_Vector128_AsUInt64: #if defined(_TARGET_XARCH_) case NI_Base_Vector128_CreateScalarUnsafe: + case NI_Base_Vector128_GetElement: + case NI_Base_Vector128_WithElement: case NI_Base_Vector128_ToScalar: case NI_Base_Vector128_ToVector256: case NI_Base_Vector128_ToVector256Unsafe: @@ -3498,6 +3500,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_Base_Vector256_AsUInt32: case NI_Base_Vector256_AsUInt64: case NI_Base_Vector256_CreateScalarUnsafe: + case NI_Base_Vector256_GetElement: + case NI_Base_Vector256_WithElement: case NI_Base_Vector256_GetLower: case NI_Base_Vector256_ToScalar: case NI_Base_Vector256_Zero: @@ -4390,6 +4394,407 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, } break; } + + case NI_Base_Vector256_WithElement: + { + if (!compSupports(InstructionSet_AVX)) + { + // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers + return nullptr; + } + __fallthrough; + } + case NI_Base_Vector128_WithElement: + { + assert(sig->numArgs == 2); + GenTree* indexOp = impStackTop(1).val; + if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst()) + { + // Using software fallback if + // 1. JIT/hardware don't support SSE2 instructions + // 2. baseType is not a numeric type (throw execptions) + // 3. index is not a constant + return nullptr; + } + + switch (baseType) + { + // Using software fallback if baseType is not supported by hardware + case TYP_BYTE: + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + if (!compSupports(InstructionSet_SSE41)) + { + return nullptr; + } + break; + case TYP_LONG: + case TYP_ULONG: + if (!compSupports(InstructionSet_SSE41_X64)) + { + return nullptr; + } + break; + case TYP_DOUBLE: + case TYP_FLOAT: + case TYP_SHORT: + case TYP_USHORT: + // short/ushort/float/double is supported by SSE2 + break; + default: + unreached(); + break; + } + + ssize_t imm8 = indexOp->AsIntCon()->IconValue(); + ssize_t cachedImm8 = imm8; + ssize_t count = simdSize / genTypeSize(baseType); + + if (imm8 >= count || imm8 < 0) + { + // Using software fallback if index is out of range (throw exeception) + return nullptr; + } + + GenTree* valueOp = impPopStack().val; + impPopStack(); + GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd); + + GenTree* clonedVectorOp = nullptr; + + if (simdSize == 32) + { + // Extract the half vector that will be modified + assert(compSupports(InstructionSet_AVX)); + + // copy `vectorOp` to accept the modified half vector + vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector256.WithElement")); + + if (imm8 >= count / 2) + { + imm8 -= count / 2; + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128, + baseType, simdSize); + } + else + { + vectorOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize); + } + } + + GenTree* immNode = gtNewIconNode(imm8); + + switch (baseType) + { + case TYP_LONG: + case TYP_ULONG: + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_X64_Insert, + baseType, 16); + break; + + case TYP_FLOAT: + { + if (!compSupports(InstructionSet_SSE41)) + { + // Emulate Vector128.WithElement by SSE instructions + if (imm8 == 0) + { + // vector.WithElement(0, value) + // => + // movss xmm0, xmm1 (xmm0 = vector, xmm1 = value) + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, + NI_Base_Vector128_CreateScalarUnsafe, TYP_FLOAT, 16); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, NI_SSE_MoveScalar, + TYP_FLOAT, 16); + } + else if (imm8 == 1) + { + // vector.WithElement(1, value) + // => + // shufps xmm1, xmm0, 0 (xmm0 = vector, xmm1 = value) + // shufps xmm1, xmm0, 226 + GenTree* tmpOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_FLOAT, 16); + GenTree* dupVectorOp = nullptr; + vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector128.WithElement")); + tmpOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, vectorOp, gtNewIconNode(0), + NI_SSE_Shuffle, TYP_FLOAT, 16); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, dupVectorOp, gtNewIconNode(226), + NI_SSE_Shuffle, TYP_FLOAT, 16); + } + else + { + ssize_t controlBits1 = 0; + ssize_t controlBits2 = 0; + if (imm8 == 2) + { + controlBits1 = 48; + controlBits2 = 132; + } + else + { + controlBits1 = 32; + controlBits2 = 36; + } + // vector.WithElement(2, value) + // => + // shufps xmm1, xmm0, 48 (xmm0 = vector, xmm1 = value) + // shufps xmm0, xmm1, 132 + // + // vector.WithElement(3, value) + // => + // shufps xmm1, xmm0, 32 (xmm0 = vector, xmm1 = value) + // shufps xmm0, xmm1, 36 + GenTree* tmpOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_FLOAT, 16); + GenTree* dupVectorOp = nullptr; + vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector128.WithElement")); + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, tmpOp, gtNewIconNode(controlBits1), + NI_SSE_Shuffle, TYP_FLOAT, 16); + retNode = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, dupVectorOp, gtNewIconNode(controlBits2), + NI_SSE_Shuffle, TYP_FLOAT, 16); + } + break; + } + else + { + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_FLOAT, 16); + immNode->AsIntCon()->SetIconValue(imm8 * 16); + __fallthrough; + } + } + + case TYP_BYTE: + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + retNode = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_Insert, baseType, 16); + break; + + case TYP_SHORT: + case TYP_USHORT: + retNode = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE2_Insert, baseType, 16); + break; + + case TYP_DOUBLE: + { + // vector.WithElement(0, value) + // => + // movsd xmm0, xmm1 (xmm0 = vector, xmm1 = value) + // + // vector.WithElement(1, value) + // => + // unpcklpd xmm0, xmm1 (xmm0 = vector, xmm1 = value) + valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe, + TYP_DOUBLE, 16); + NamedIntrinsic in = (imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow; + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, in, TYP_DOUBLE, 16); + break; + } + + default: + unreached(); + break; + } + + if (simdSize == 32) + { + assert(clonedVectorOp); + int upperOrLower = (cachedImm8 >= count / 2) ? 1 : 0; + retNode = gtNewSimdHWIntrinsicNode(retType, clonedVectorOp, retNode, gtNewIconNode(upperOrLower), + NI_AVX_InsertVector128, baseType, simdSize); + } + + break; + } + + case NI_Base_Vector256_GetElement: + { + if (!compSupports(InstructionSet_AVX)) + { + // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers + return nullptr; + } + __fallthrough; + } + case NI_Base_Vector128_GetElement: + { + assert(sig->numArgs == 1); + GenTree* indexOp = impStackTop().val; + if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst()) + { + // Using software fallback if + // 1. JIT/hardware don't support SSE2 instructions + // 2. baseType is not a numeric type (throw execptions) + // 3. index is not a constant + return nullptr; + } + + switch (baseType) + { + // Using software fallback if baseType is not supported by hardware + case TYP_BYTE: + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + if (!compSupports(InstructionSet_SSE41)) + { + return nullptr; + } + break; + case TYP_LONG: + case TYP_ULONG: + if (!compSupports(InstructionSet_SSE41_X64)) + { + return nullptr; + } + break; + case TYP_DOUBLE: + case TYP_FLOAT: + case TYP_SHORT: + case TYP_USHORT: + // short/ushort/float/double is supported by SSE2 + break; + default: + break; + } + + ssize_t imm8 = indexOp->AsIntCon()->IconValue(); + ssize_t count = simdSize / genTypeSize(baseType); + + if (imm8 >= count || imm8 < 0) + { + // Using software fallback if index is out of range (throw exeception) + return nullptr; + } + + impPopStack(); + GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize), true, clsHnd); + NamedIntrinsic resIntrinsic = NI_Illegal; + + if (simdSize == 32) + { + assert(compSupports(InstructionSet_AVX)); + if (imm8 >= count / 2) + { + imm8 -= count / 2; + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128, + baseType, simdSize); + } + else + { + vectorOp = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize); + } + } + + if (imm8 == 0 && (genTypeSize(baseType) >= 4)) + { + switch (baseType) + { + case TYP_LONG: + resIntrinsic = NI_SSE2_X64_ConvertToInt64; + break; + case TYP_ULONG: + resIntrinsic = NI_SSE2_X64_ConvertToUInt64; + break; + case TYP_INT: + resIntrinsic = NI_SSE2_ConvertToInt32; + break; + case TYP_UINT: + resIntrinsic = NI_SSE2_ConvertToUInt32; + break; + case TYP_FLOAT: + case TYP_DOUBLE: + resIntrinsic = NI_Base_Vector128_ToScalar; + break; + default: + unreached(); + } + return gtNewSimdHWIntrinsicNode(retType, vectorOp, resIntrinsic, baseType, 16); + } + + GenTree* immNode = gtNewIconNode(imm8); + + switch (baseType) + { + case TYP_LONG: + case TYP_ULONG: + retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_X64_Extract, baseType, 16); + break; + + case TYP_FLOAT: + { + if (!compSupports(InstructionSet_SSE41)) + { + assert(imm8 >= 1); + assert(imm8 <= 3); + // Emulate Vector128.GetElement(i) by SSE instructions + // vector.GetElement(i) + // => + // shufps xmm0, xmm0, control + // (xmm0 = vector, control = i + 228) + immNode->AsIntCon()->SetIconValue(228 + imm8); + GenTree* clonedVectorOp = nullptr; + vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone Vector for Vector128.GetElement")); + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, clonedVectorOp, immNode, + NI_SSE_Shuffle, TYP_FLOAT, 16); + return gtNewSimdHWIntrinsicNode(retType, vectorOp, NI_Base_Vector128_ToScalar, TYP_FLOAT, 16); + } + __fallthrough; + } + case TYP_UBYTE: + case TYP_INT: + case TYP_UINT: + retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_Extract, baseType, 16); + break; + + case TYP_BYTE: + // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result + retNode = gtNewSimdHWIntrinsicNode(TYP_UBYTE, vectorOp, immNode, NI_SSE41_Extract, TYP_UBYTE, 16); + retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_BYTE); + break; + + case TYP_SHORT: + case TYP_USHORT: + // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result + retNode = gtNewSimdHWIntrinsicNode(TYP_USHORT, vectorOp, immNode, NI_SSE2_Extract, TYP_USHORT, 16); + if (baseType == TYP_SHORT) + { + retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_SHORT); + } + break; + + case TYP_DOUBLE: + assert(imm8 == 1); + // vector.GetElement(1) + // => + // pshufd xmm1, xmm0, 0xEE (xmm0 = vector) + vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(0xEE), NI_SSE2_Shuffle, + TYP_INT, 16); + retNode = + gtNewSimdHWIntrinsicNode(TYP_DOUBLE, vectorOp, NI_Base_Vector128_ToScalar, TYP_DOUBLE, 16); + break; + + default: + unreached(); + } + + break; + } + #endif // _TARGET_XARCH_ default: @@ -4683,6 +5088,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } #if defined(_TARGET_XARCH_) + else if (strcmp(methodName, "GetElement") == 0) + { + result = NI_Base_Vector128_GetElement; + } + else if (strcmp(methodName, "WithElement") == 0) + { + result = NI_Base_Vector128_WithElement; + } else if (strcmp(methodName, "get_Zero") == 0) { result = NI_Base_Vector128_Zero; @@ -4783,6 +5196,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_Base_Vector256_GetLower; } + else if (strcmp(methodName, "GetElement") == 0) + { + result = NI_Base_Vector256_GetElement; + } + else if (strcmp(methodName, "WithElement") == 0) + { + result = NI_Base_Vector256_WithElement; + } else if (strcmp(methodName, "ToScalar") == 0) { result = NI_Base_Vector256_ToScalar; diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957.cs b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957.cs new file mode 100644 index 0000000..1e65f75 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957.cs @@ -0,0 +1,208 @@ +using System; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace GitHub_17957 +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static int Main(string[] args) + { + return (Test128() && Test256()) ? Pass : Fail; + } + + public static bool Test128() + { + Vector128 vs = Vector128.Zero; + vs = vs.WithElement(0, -1); + if (vs.GetElement(0) != -1) + { + return false; + } + + vs = vs.WithElement(3, -1); + if (vs.GetElement(3) != -1) + { + return false; + } + + vs = vs.WithElement(7, -1); + if (vs.GetElement(7) != -1) + { + return false; + } + + + Vector128 vus = Vector128.Zero; + vus = vus.WithElement(0, ushort.MaxValue); + if (vus.GetElement(0) != ushort.MaxValue) + { + return false; + } + + vus = vus.WithElement(3, ushort.MaxValue); + if (vus.GetElement(3) != ushort.MaxValue) + { + return false; + } + + vus = vus.WithElement(7, ushort.MaxValue); + if (vus.GetElement(7) != ushort.MaxValue) + { + return false; + } + + + Vector128 vsb = Vector128.Zero; + vsb = vsb.WithElement(0, -1); + if (vsb.GetElement(0) != -1) + { + return false; + } + + vsb = vsb.WithElement(7, -1); + if (vsb.GetElement(7) != -1) + { + return false; + } + + vsb = vsb.WithElement(15, -1); + if (vsb.GetElement(15) != -1) + { + return false; + } + + Vector128 vb = Vector128.Zero; + vb = vb.WithElement(0, byte.MaxValue); + if (vb.GetElement(0) != byte.MaxValue) + { + return false; + } + + vb = vb.WithElement(7, byte.MaxValue); + if (vb.GetElement(7) != byte.MaxValue) + { + return false; + } + + vb = vb.WithElement(15, byte.MaxValue); + if (vb.GetElement(15) != byte.MaxValue) + { + return false; + } + + Vector128 vf = Vector128.Zero; + vf = vf.WithElement(0, -1.0f); + if (vf.GetElement(0) != -1.0f) + { + return false; + } + + vf = vf.WithElement(1, -1f); + if (vf.GetElement(1) != -1.0f) + { + return false; + } + + vf = vf.WithElement(2, -1f); + if (vf.GetElement(2) != -1.0f) + { + return false; + } + + vf = vf.WithElement(3, -1.0f); + if (vf.GetElement(3) != -1.0f) + { + return false; + } + + return true; + } + + public static bool Test256() + { + Vector256 vs = Vector256.Zero; + vs = vs.WithElement(0, -1); + if (vs.GetElement(0) != -1) + { + return false; + } + + vs = vs.WithElement(3, -1); + if (vs.GetElement(3) != -1) + { + return false; + } + + vs = vs.WithElement(9, -1); + if (vs.GetElement(9) != -1) + { + return false; + } + + + Vector256 vus = Vector256.Zero; + vus = vus.WithElement(0, ushort.MaxValue); + if (vus.GetElement(0) != ushort.MaxValue) + { + return false; + } + + vus = vus.WithElement(3, ushort.MaxValue); + if (vus.GetElement(3) != ushort.MaxValue) + { + return false; + } + + vus = vus.WithElement(8, ushort.MaxValue); + if (vus.GetElement(8) != ushort.MaxValue) + { + return false; + } + + + Vector256 vsb = Vector256.Zero; + vsb = vsb.WithElement(0, -1); + if (vsb.GetElement(0) != -1) + { + return false; + } + + vsb = vsb.WithElement(7, -1); + if (vsb.GetElement(7) != -1) + { + return false; + } + + vsb = vsb.WithElement(16, -1); + if (vsb.GetElement(16) != -1) + { + return false; + } + + Vector256 vb = Vector256.Zero; + vb = vb.WithElement(0, byte.MaxValue); + if (vb.GetElement(0) != byte.MaxValue) + { + return false; + } + + vb = vb.WithElement(7, byte.MaxValue); + if (vb.GetElement(7) != byte.MaxValue) + { + return false; + } + + vb = vb.WithElement(17, byte.MaxValue); + if (vb.GetElement(17) != byte.MaxValue) + { + return false; + } + + return true; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_r.csproj new file mode 100644 index 0000000..ae0490a --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + Embedded + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_ro.csproj new file mode 100644 index 0000000..3995aa7 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Regression/GitHub_17957/GitHub_17957_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + Embedded + True + + + + + + + + + + -- 2.7.4