From aa0204c3dbee02968536cf227c67eb897a9a86d8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 24 Mar 2020 09:28:08 -0700 Subject: [PATCH] Update lowerxarch to handle some hwintrinsics that were missed (#33983) * Update lowerxarch to handle some hwintrinsics that were missed * Don't mark ShiftLeftLogical128BitLane or ShiftRightLogical128BitLane as NoContainment, as they have a containable immediate --- src/coreclr/src/jit/hwintrinsiclistxarch.h | 8 ++++---- src/coreclr/src/jit/lowerxarch.cpp | 21 ++++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index de3add9..33360e9 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -45,8 +45,8 @@ HARDWARE_INTRINSIC(Vector128_AsVector4, "AsVector4", HARDWARE_INTRINSIC(Vector128_AsVector128, "AsVector128", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_Count, "get_Count", Vector128, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_CreateScalarUnsafe, "CreateScalarUnsafe", Vector128, -1, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128_GetElement, "GetElement", Vector128, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128_WithElement, "WithElement", Vector128, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128_GetElement, "GetElement", Vector128, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128_WithElement, "WithElement", Vector128, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128_ToScalar, "ToScalar", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_ToVector256, "ToVector256", Vector128, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_ToVector256Unsafe, "ToVector256Unsafe", Vector128, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -72,8 +72,8 @@ HARDWARE_INTRINSIC(Vector256_AsVector, "AsVector", HARDWARE_INTRINSIC(Vector256_AsVector256, "AsVector256", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_Count, "get_Count", Vector256, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_CreateScalarUnsafe, "CreateScalarUnsafe", Vector256, -1, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector256_GetElement, "GetElement", Vector256, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256_WithElement, "WithElement", Vector256, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256_GetElement, "GetElement", Vector256, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256_WithElement, "WithElement", Vector256, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256_GetLower, "GetLower", Vector256, -1, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_ToScalar, "ToScalar", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_Zero, "get_Zero", Vector256, -1, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 3962f0b..92e9965 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -3047,7 +3047,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if (varTypeIsIntegral(baseType)) { - // These intrinsics are "ins reg/mem, xmm" and don't + // TODO-XARCH-CQ: These intrinsics are "ins reg/mem, xmm" and don't // currently support containment. return; } @@ -3165,6 +3165,17 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { + case NI_SSE2_Extract: + case NI_SSE41_Extract: + case NI_SSE41_X64_Extract: + case NI_AVX_ExtractVector128: + case NI_AVX2_ExtractVector128: + { + // TODO-XARCH-CQ: These intrinsics are "ins reg/mem, xmm, imm8" and don't + // currently support containment. + break; + } + case NI_SSE2_ShiftLeftLogical: case NI_SSE2_ShiftRightArithmetic: case NI_SSE2_ShiftRightLogical: @@ -3192,6 +3203,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_ShuffleHigh: case NI_SSE2_ShuffleLow: case NI_AVX2_Permute4x64: + case NI_AVX2_Shuffle: + case NI_AVX2_ShuffleHigh: + case NI_AVX2_ShuffleLow: { // These intrinsics have op2 as an imm and op1 as a reg/mem @@ -3248,6 +3262,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) default: { + assert("Unhandled containment for binary hardware intrinsic with immediate operand"); break; } } @@ -3422,9 +3437,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX_Compare: case NI_AVX_CompareScalar: case NI_AVX_DotProduct: + case NI_AVX_InsertVector128: case NI_AVX_Permute2x128: case NI_AVX_Shuffle: + case NI_AVX2_AlignRight: case NI_AVX2_Blend: + case NI_AVX2_InsertVector128: case NI_AVX2_MultipleSumAbsoluteDifferences: case NI_AVX2_Permute2x128: case NI_PCLMULQDQ_CarrylessMultiply: @@ -3442,6 +3460,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) default: { + assert("Unhandled containment for ternary hardware intrinsic with immediate operand"); break; } } -- 2.7.4