From 797ced585277115448e8466cd33a6d633608bc88 Mon Sep 17 00:00:00 2001 From: Fei Peng Date: Wed, 24 Oct 2018 12:27:31 -0700 Subject: [PATCH] Implement AVX2.BroadcastScalarToVector128/256 --- src/jit/gentree.cpp | 72 ++++++++++++++++++++++--------------- src/jit/hwintrinsiccodegenxarch.cpp | 2 +- src/jit/hwintrinsiclistxarch.h | 20 +++++------ src/jit/hwintrinsicxarch.cpp | 2 +- src/jit/hwintrinsicxarch.h | 30 ++++++++++------ 5 files changed, 74 insertions(+), 52 deletions(-) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 37feba6..2e73857 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -5274,7 +5274,7 @@ bool GenTree::OperMayThrow(Compiler* comp) { GenTreeHWIntrinsic* hwIntrinsicNode = this->AsHWIntrinsic(); assert(hwIntrinsicNode != nullptr); - if (hwIntrinsicNode->OperIsMemoryStore() || hwIntrinsicNode->OperIsMemoryLoad()) + if (hwIntrinsicNode->OperIsMemoryLoadOrStore()) { // This operation contains an implicit indirection // it could throw a null reference exception. @@ -17355,29 +17355,47 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad() { return true; } - else if (category == HW_Category_IMM) + else if (HWIntrinsicInfo::MaybeMemoryLoad(gtHWIntrinsicId)) { - // Some AVX instructions here also have MemoryLoad sematics - - // Do we have less than 3 operands? - if (HWIntrinsicInfo::lookupNumArgs(this) < 3) + // Some AVX intrinsic (without HW_Category_MemoryLoad) also have MemoryLoad semantics + if (category == HW_Category_SIMDScalar) { - return false; + // Avx2.BroadcastScalarToVector128/256 have vector and pointer overloads both, e.g., + // Vector128 BroadcastScalarToVector128(Vector128 value) + // Vector128 BroadcastScalarToVector128(byte* source) + // So, we need to check the argument's type is memory-reference (TYP_I_IMPL) or not + assert(HWIntrinsicInfo::lookupNumArgs(this) == 1); + return (gtHWIntrinsicId == NI_AVX2_BroadcastScalarToVector128 || + gtHWIntrinsicId == NI_AVX2_BroadcastScalarToVector256) && + gtOp.gtOp1->TypeGet() == TYP_I_IMPL; } - else // We have 3 or more operands/args + else if (category == HW_Category_IMM) { - if (HWIntrinsicInfo::isAVX2GatherIntrinsic(gtHWIntrinsicId)) + // Do we have less than 3 operands? + if (HWIntrinsicInfo::lookupNumArgs(this) < 3) { - return true; + return false; } + else // We have 3 or more operands/args + { + // All the Avx2.Gather* are "load" instructions + if (HWIntrinsicInfo::isAVX2GatherIntrinsic(gtHWIntrinsicId)) + { + return true; + } - GenTreeArgList* argList = gtOp.gtOp1->AsArgList(); + GenTreeArgList* argList = gtOp.gtOp1->AsArgList(); - if ((gtHWIntrinsicId == NI_AVX_InsertVector128 || gtHWIntrinsicId == NI_AVX2_InsertVector128) && - (argList->Rest()->Current()->TypeGet() == TYP_I_IMPL)) // Is the type of the second arg TYP_I_IMPL? - { - // This is Avx/Avx2.InsertVector128 - return true; + // Avx/Avx2.InsertVector128 have vector and pointer overloads both, e.g., + // Vector256 InsertVector128(Vector256 value, Vector128 data, byte index) + // Vector256 InsertVector128(Vector256 value, sbyte* address, byte index) + // So, we need to check the second argument's type is memory-reference (TYP_I_IMPL) or not + if ((gtHWIntrinsicId == NI_AVX_InsertVector128 || gtHWIntrinsicId == NI_AVX2_InsertVector128) && + (argList->Rest()->Current()->TypeGet() == TYP_I_IMPL)) // Is the type of the second arg TYP_I_IMPL? + { + // This is Avx/Avx2.InsertVector128 + return true; + } } } } @@ -17395,22 +17413,18 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() { return true; } - else if (category == HW_Category_IMM) + else if (HWIntrinsicInfo::MaybeMemoryStore(gtHWIntrinsicId) && category == HW_Category_IMM) { - // Some AVX instructions here also have MemoryStore sematics + // Some AVX intrinsic (without HW_Category_MemoryStore) also have MemoryStore semantics - // Do we have 3 operands? - if (HWIntrinsicInfo::lookupNumArgs(this) != 3) - { - return false; - } - else // We have 3 operands/args + // Avx/Avx2.InsertVector128 have vector and pointer overloads both, e.g., + // Vector128 ExtractVector128(Vector256 value, byte index) + // void ExtractVector128(sbyte* address, Vector256 value, byte index) + // So, the 3-argument form is MemoryStore + if ((HWIntrinsicInfo::lookupNumArgs(this) == 3) && + (gtHWIntrinsicId == NI_AVX_ExtractVector128 || gtHWIntrinsicId == NI_AVX2_ExtractVector128)) { - if ((gtHWIntrinsicId == NI_AVX_ExtractVector128 || gtHWIntrinsicId == NI_AVX2_ExtractVector128)) - { - // This is Avx/Avx2.ExtractVector128 - return true; - } + return true; } } #endif // _TARGET_XARCH_ diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index b4c3910..3e50f66 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -112,7 +112,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) genConsumeOperands(node); op1Reg = op1->gtRegNum; - if (category == HW_Category_MemoryLoad) + if (node->OperIsMemoryLoad()) { emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0); } diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 4fe5345..4052e4b 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -355,13 +355,13 @@ HARDWARE_INTRINSIC(AVX_DuplicateEvenIndexed, "DuplicateEv HARDWARE_INTRINSIC(AVX_DuplicateOddIndexed, "DuplicateOddIndexed", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_Extract, "Extract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(AVX_ExtendToVector256, "ExtendToVector256", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(AVX_ExtractVector128, "ExtractVector128", AVX, -1, 32, -1, {INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX_ExtractVector128, "ExtractVector128", AVX, -1, 32, -1, {INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_MaybeMemoryStore|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX_Floor, "Floor", AVX, 9, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_GetLowerHalf, "GetLowerHalf", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_HorizontalAdd, "HorizontalAdd", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_HorizontalSubtract, "HorizontalSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_Insert, "Insert", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoCodeGen|HW_Flag_SecondArgMaybe64Bit) -HARDWARE_INTRINSIC(AVX_InsertVector128, "InsertVector128", AVX, -1, 32, 3, {INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX_InsertVector128, "InsertVector128", AVX, -1, 32, 3, {INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX_LoadAlignedVector256, "LoadAlignedVector256", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_LoadDquVector256, "LoadDquVector256", AVX, -1, 32, 1, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_LoadVector256, "LoadVector256", AVX, -1, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) @@ -415,12 +415,12 @@ HARDWARE_INTRINSIC(AVX2_AndNot, "AndNot", HARDWARE_INTRINSIC(AVX2_Average, "Average", AVX2, -1, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2_Blend, "Blend", AVX2, -1, 0, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_UnfixedSIMDSize|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_BlendVariable, "BlendVariable", AVX2, -1, 32, 3, {INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector128, "BroadcastScalarToVector128", AVX2, -1, 16, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector256, "BroadcastScalarToVector256", AVX2, -1, 32, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector128, "BroadcastScalarToVector128", AVX2, -1, 16, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup}, HW_Category_SIMDScalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector256, "BroadcastScalarToVector256", AVX2, -1, 32, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX2_BroadcastVector128ToVector256, "BroadcastVector128ToVector256", AVX2, -1, 32, 1, {INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_CompareEqual, "CompareEqual", AVX2, -1, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2_CompareGreaterThan, "CompareGreaterThan", AVX2, -1, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2_ExtractVector128, "ExtractVector128", AVX2, -1, 32, -1, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2_ExtractVector128, "ExtractVector128", AVX2, -1, 32, -1, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeMemoryStore|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ConvertToDouble, "ConvertToDouble", AVX2, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_Helper, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2_ConvertToInt32, "ConvertToInt32", AVX2, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2_ConvertToUInt32, "ConvertToUInt32", AVX2, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) @@ -430,15 +430,15 @@ HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32, "ConvertToVe HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt32, "ConvertToVector256UInt32", AVX2, -1, 32, 1, {INS_invalid, INS_pmovzxbd, INS_invalid, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_invalid, INS_pmovsxwq, INS_invalid, INS_pmovsxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt64, "ConvertToVector256UInt64", AVX2, -1, 32, 1, {INS_invalid, INS_pmovzxbq, INS_invalid, INS_pmovzxwq, INS_invalid, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_HorizontalAdd, "HorizontalAdd", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate, "HorizontalAddSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalSubtract, "HorizontalSubtract", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate, "HorizontalSubtractSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2_InsertVector128, "InsertVector128", AVX2, -1, 32, 3, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2_InsertVector128, "InsertVector128", AVX2, -1, 32, 3, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal, "LoadAlignedVector256NonTemporal", AVX2, -1, 32, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_MaskLoad, "MaskLoad", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize) HARDWARE_INTRINSIC(AVX2_MaskStore, "MaskStore", AVX2, -1, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 3948273..9f8a67e 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -443,7 +443,6 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa) switch (isa) { // These ISAs are partially implemented - case InstructionSet_AVX2: case InstructionSet_BMI1: case InstructionSet_BMI2: case InstructionSet_SSE42: @@ -454,6 +453,7 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa) // These ISAs are fully implemented case InstructionSet_AES: case InstructionSet_AVX: + case InstructionSet_AVX2: case InstructionSet_FMA: case InstructionSet_LZCNT: case InstructionSet_PCLMULQDQ: diff --git a/src/jit/hwintrinsicxarch.h b/src/jit/hwintrinsicxarch.h index d74709a..3b4634b 100644 --- a/src/jit/hwintrinsicxarch.h +++ b/src/jit/hwintrinsicxarch.h @@ -72,16 +72,13 @@ enum HWIntrinsicFlag : unsigned int // - overloaded on multiple vector sizes (SIMD size in the table is unreliable) HW_Flag_UnfixedSIMDSize = 0x20, - // Complex overload - // - the codegen of overloads cannot be determined by intrinsicID and base type - HW_Flag_ComplexOverloads = 0x40, - // Multi-instruction // - that one intrinsic can generate multiple instructions HW_Flag_MultiIns = 0x80, // NoContainment - // the intrinsic cannot be contained + // the intrinsic cannot be handled by comtainment, + // all the intrinsic that have explicit memory load/store semantics should have this flag HW_Flag_NoContainment = 0x100, // Copy Upper bits @@ -123,6 +120,11 @@ enum HWIntrinsicFlag : unsigned int // the intrinsics need special rules in importer, // but may be table-driven in the back-end HW_Flag_SpecialImport = 0x80000, + + // Maybe Memory Load/Store + // - some intrinsics may have pointer overloads but without HW_Category_MemoryLoad/HW_Category_MemoryStore + HW_Flag_MaybeMemoryLoad = 0x100000, + HW_Flag_MaybeMemoryStore = 0x200000, }; struct HWIntrinsicInfo @@ -241,12 +243,6 @@ struct HWIntrinsicInfo return (flags & HW_Flag_UnfixedSIMDSize) == 0; } - static const bool HasComplexOverloads(NamedIntrinsic id) - { - HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_ComplexOverloads) != 0; - } - static const bool GeneratesMultipleIns(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); @@ -283,6 +279,18 @@ struct HWIntrinsicInfo return (flags & HW_Flag_MaybeIMM) != 0; } + static const bool MaybeMemoryLoad(NamedIntrinsic id) + { + HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_MaybeMemoryLoad) != 0; + } + + static const bool MaybeMemoryStore(NamedIntrinsic id) + { + HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_MaybeMemoryStore) != 0; + } + static const bool NoJmpTableImm(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); -- 2.7.4