From 450122b4dce44cca64a186db2d8ecc6454e34fa5 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 17 Jun 2018 08:58:20 -0700 Subject: [PATCH] Fold away Sse.StaticCast and Avx.StaticCast in the importer Commit migrated from https://github.com/dotnet/coreclr/commit/94edffc61a1cd7bf61fad5a4910d7cfb786b6f24 --- src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp | 2 -- src/coreclr/src/jit/hwintrinsiclistxarch.h | 4 ++-- src/coreclr/src/jit/hwintrinsicxarch.cpp | 24 ++++++++++++++++++++++++ src/coreclr/src/jit/lsraxarch.cpp | 2 -- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index db493a9..0a03df2 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -1337,7 +1337,6 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) } case NI_SSE_ConvertToSingle: - case NI_SSE_StaticCast: { assert(op2 == nullptr); if (op1Reg != targetReg) @@ -1985,7 +1984,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) } case NI_AVX_GetLowerHalf: - case NI_AVX_StaticCast: { assert(op2 == nullptr); regNumber op1Reg = op1->gtRegNum; diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 3f2f08b..84c5e87 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -106,7 +106,7 @@ HARDWARE_INTRINSIC(SSE_SetZeroVector128, "SetZeroVect HARDWARE_INTRINSIC(SSE_Shuffle, "Shuffle", SSE, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE_Sqrt, "Sqrt", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_SqrtScalar, "SqrtScalar", SSE, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_StaticCast, "StaticCast", SSE, -1, 16, 1, {INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_TwoTypeGeneric|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_StaticCast, "StaticCast", SSE, -1, 16, 1, {INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_TwoTypeGeneric) HARDWARE_INTRINSIC(SSE_Store, "Store", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_StoreAligned, "StoreAligned", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) @@ -387,7 +387,7 @@ HARDWARE_INTRINSIC(AVX_SetAllVector256, "SetAllVecto HARDWARE_INTRINSIC(AVX_SetZeroVector256, "SetZeroVector256", AVX, -1, 32, 0, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_xorps, INS_xorpd}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_Shuffle, "Shuffle", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_NoRMWSemantics|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX_Sqrt, "Sqrt", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(AVX_StaticCast, "StaticCast", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_TwoTypeGeneric|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX_StaticCast, "StaticCast", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_TwoTypeGeneric) HARDWARE_INTRINSIC(AVX_Store, "Store", AVX, -1, 32, 2, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_StoreAligned, "StoreAligned", AVX, -1, 32, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", AVX, -1, 32, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) diff --git a/src/coreclr/src/jit/hwintrinsicxarch.cpp b/src/coreclr/src/jit/hwintrinsicxarch.cpp index 5cc91d7..850ff37 100644 --- a/src/coreclr/src/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsicxarch.cpp @@ -905,6 +905,18 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_SSE_StaticCast: + { + // We fold away the static cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both TYP_SIMD16. + assert(sig->numArgs == 1); + retNode = impSIMDPopStack(TYP_SIMD16); + SetOpLclRelatedToSIMDIntrinsic(retNode); + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + break; + } + case NI_SSE_StoreFence: assert(sig->numArgs == 0); assert(JITtype2varType(sig->retType) == TYP_VOID); @@ -1214,6 +1226,18 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, break; } + case NI_AVX_StaticCast: + { + // We fold away the static cast here, as it only exists to satisfy + // the type system. It is safe to do this here since the retNode type + // and the signature return type are both TYP_SIMD32. + assert(sig->numArgs == 1); + retNode = impSIMDPopStack(TYP_SIMD32); + SetOpLclRelatedToSIMDIntrinsic(retNode); + assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + break; + } + case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: { diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 5383efa..089e80f 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -2384,11 +2384,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } case NI_SSE_ConvertToSingle: - case NI_SSE_StaticCast: case NI_SSE2_ConvertToDouble: case NI_AVX_ExtendToVector256: case NI_AVX_GetLowerHalf: - case NI_AVX_StaticCast: { assert(numArgs == 1); assert(!isRMW); -- 2.7.4