Implement the remaining AVX2 intrinsic
authorFei Peng <fei.peng@intel.com>
Tue, 2 Oct 2018 00:13:02 +0000 (17:13 -0700)
committerFei Peng <fei.peng@intel.com>
Wed, 3 Oct 2018 21:40:56 +0000 (14:40 -0700)
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/instrsxarch.h
src/jit/lowerxarch.cpp

index e8dbc0b..89353c0 100644 (file)
@@ -206,7 +206,7 @@ HARDWARE_INTRINSIC(SSE2_MoveScalar,                                 "MoveScalar"
 HARDWARE_INTRINSIC(SSE2_Multiply,                                   "Multiply",                                     SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pmuludq,        INS_invalid,        INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_MultiplyHigh,                               "MultiplyHigh",                                 SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pmulhw,         INS_pmulhuw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_MultiplyAddAdjacent,                        "MultiplyAddAdjacent",                          SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pmaddwd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MultiplyLow,                                "MultiplyLow",                                  SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pmullw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2_MultiplyLow,                                "MultiplyLow",                                  SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pmullw,         INS_pmullw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_MultiplyScalar,                             "MultiplyScalar",                               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_Or,                                         "Or",                                           SSE2,         -1,              16,           2,     {INS_por,               INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_invalid,        INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_PackSignedSaturate,                         "PackSignedSaturate",                           SSE2,         -1,              16,           2,     {INS_packsswb,          INS_invalid,        INS_packssdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -260,7 +260,7 @@ HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate,                        "MoveLowAndD
 //  SSSE3 Intrinsics
 HARDWARE_INTRINSIC(SSSE3_IsSupported,                               "get_IsSupported",                              SSSE3,        -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSSE3_Abs,                                       "Abs",                                          SSSE3,        -1,              16,           1,     {INS_invalid,           INS_pabsb,          INS_invalid,        INS_pabsw,          INS_invalid,        INS_pabsd,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSSE3_AlignRight,                                "AlignRight",                                   SSSE3,        -1,              16,           3,     {INS_palignr,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSSE3_AlignRight,                                "AlignRight",                                   SSSE3,        -1,              16,           3,     {INS_palignr,           INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSSE3_HorizontalAdd,                             "HorizontalAdd",                                SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_phaddw,         INS_invalid,        INS_phaddd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSSE3_HorizontalAddSaturate,                     "HorizontalAddSaturate",                        SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_phaddsw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSSE3_HorizontalSubtract,                        "HorizontalSubtract",                           SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_phsubw,         INS_invalid,        INS_phsubd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -277,7 +277,7 @@ HARDWARE_INTRINSIC(SSSE3_Sign,                                      "Sign",
 //  SSE41 Intrinsics
 HARDWARE_INTRINSIC(SSE41_IsSupported,                               "get_IsSupported",                              SSE41,        -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_Blend,                                     "Blend",                                        SSE41,        -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_pblendw,        INS_pblendw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blendps,        INS_blendpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE41_BlendVariable,                             "BlendVariable",                                SSE41,        -1,              16,           3,     {INS_pblendvb,          INS_pblendvb,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blendvps,       INS_blendvpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_BlendVariable,                             "BlendVariable",                                SSE41,        -1,              16,           3,     {INS_pblendvb,          INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_blendvps,       INS_blendvpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_Ceiling,                                   "Ceiling",                                      SSE41,        10,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_CeilingScalar,                             "CeilingScalar",                                SSE41,        10,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE41_CompareEqual,                              "CompareEqual",                                 SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pcmpeqq,        INS_pcmpeqq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
@@ -295,7 +295,7 @@ HARDWARE_INTRINSIC(SSE41_Min,                                       "Min",
 HARDWARE_INTRINSIC(SSE41_MinHorizontal,                             "MinHorizontal",                                SSE41,        -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_phminposuw,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_MultipleSumAbsoluteDifferences,            "MultipleSumAbsoluteDifferences",               SSE41,        -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_mpsadbw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE41_Multiply,                                  "Multiply",                                     SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pmuldq,         INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE41_MultiplyLow,                               "MultiplyLow",                                  SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pmulld,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_MultiplyLow,                               "MultiplyLow",                                  SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pmulld,         INS_pmulld,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_PackUnsignedSaturate,                      "PackUnsignedSaturate",                         SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_packusdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_RoundCurrentDirection,                     "RoundCurrentDirection",                        SSE41,         4,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_RoundCurrentDirectionScalar,               "RoundCurrentDirectionScalar",                  SSE41,         4,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
@@ -409,12 +409,12 @@ HARDWARE_INTRINSIC(AVX2_IsSupported,                                "get_IsSuppo
 HARDWARE_INTRINSIC(AVX2_Abs,                                        "Abs",                                          AVX2,         -1,              32,           1,     {INS_pabsb,             INS_pabsb,          INS_pabsw,          INS_pabsw,          INS_pabsd,          INS_pabsd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2_Add,                                        "Add",                                          AVX2,         -1,              32,           2,     {INS_paddb,             INS_paddb,          INS_paddw,          INS_paddw,          INS_paddd,          INS_paddd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_AddSaturate,                                "AddSaturate",                                  AVX2,         -1,              32,           2,     {INS_paddsb,            INS_paddusb,        INS_paddsw,         INS_paddusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_AlignRight,                                 "AlignRight",                                   AVX2,         -1,              32,           3,     {INS_palignr,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2_AlignRight,                                 "AlignRight",                                   AVX2,         -1,              32,           3,     {INS_palignr,           INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_And,                                        "And",                                          AVX2,         -1,              32,           2,     {INS_pand,              INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_AndNot,                                     "AndNot",                                       AVX2,         -1,              32,           2,     {INS_pandn,             INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_Average,                                    "Average",                                      AVX2,         -1,              32,           2,     {INS_invalid,           INS_pavgb,          INS_invalid,        INS_pavgw,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Blend,                                      "Blend",                                        AVX2,         -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_pblendw,        INS_pblendw,        INS_vpblendd,       INS_vpblendd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_UnfixedSIMDSize|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_BlendVariable,                              "BlendVariable",                                AVX2,         -1,              32,           3,     {INS_vpblendvb,         INS_vpblendvb,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_BlendVariable,                              "BlendVariable",                                AVX2,         -1,              32,           3,     {INS_vpblendvb,         INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector128,                 "BroadcastScalarToVector128",                   AVX2,         -1,              16,           1,     {INS_vpbroadcastb,      INS_vpbroadcastb,   INS_vpbroadcastw,   INS_vpbroadcastw,   INS_vpbroadcastd,   INS_vpbroadcastd,   INS_vpbroadcastq,   INS_vpbroadcastq,   INS_vbroadcastss,   INS_movddup},           HW_Category_SIMDScalar,             HW_Flag_OneTypeGeneric)
 HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector256,                 "BroadcastScalarToVector256",                   AVX2,         -1,              32,           1,     {INS_vpbroadcastb,      INS_vpbroadcastb,   INS_vpbroadcastw,   INS_vpbroadcastw,   INS_vpbroadcastd,   INS_vpbroadcastd,   INS_vpbroadcastq,   INS_vpbroadcastq,   INS_vbroadcastss,   INS_vbroadcastsd},      HW_Category_SIMDScalar,             HW_Flag_OneTypeGeneric)
 HARDWARE_INTRINSIC(AVX2_BroadcastVector128ToVector256,              "BroadcastVector128ToVector256",                AVX2,         -1,              32,           1,     {INS_vbroadcasti128,    INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoContainment)
@@ -446,16 +446,30 @@ HARDWARE_INTRINSIC(AVX2_Max,                                        "Max",
 HARDWARE_INTRINSIC(AVX2_Min,                                        "Min",                                          AVX2,         -1,              32,           2,     {INS_pminsb,            INS_pminub,         INS_pminsw,         INS_pminuw,         INS_pminsd,         INS_pminud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_MoveMask,                                   "MoveMask",                                     AVX2,         -1,              32,           1,     {INS_pmovmskb,          INS_pmovmskb,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2_Multiply,                                   "Multiply",                                     AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pmuldq,         INS_pmuludq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2_MultipleSumAbsoluteDifferences,             "MultipleSumAbsoluteDifferences",               AVX2,         -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_mpsadbw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2_MultiplyAddAdjacent,                        "MultiplyAddAdjacent",                          AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmaddubsw,      INS_invalid,        INS_pmaddwd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_MultiplyHigh,                               "MultiplyHigh",                                 AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmulhw,         INS_pmulhuw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2_MultiplyHighRoundScale,                     "MultiplyHighRoundScale",                       AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmulhrsw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_MultiplyLow,                                "MultiplyLow",                                  AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmullw,         INS_pmullw,         INS_pmulld,         INS_pmulld,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Or,                                         "Or",                                           AVX2,         -1,              32,           2,     {INS_por,               INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Permute2x128,                               "Permute2x128",                                 AVX2,         -1,              32,           3,     {INS_vperm2i128,        INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_Permute4x64,                                "Permute4x64",                                  AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vpermq,         INS_vpermq,         INS_invalid,        INS_vpermpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2_PermuteVar8x32,                             "PermuteVar8x32",                               AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpermd,         INS_vpermd,         INS_invalid,        INS_invalid,        INS_vpermps,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(AVX2_PackSignedSaturate,                         "PackSignedSaturate",                           AVX2,         -1,              32,           2,     {INS_packsswb,          INS_invalid,        INS_packssdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_PackUnsignedSaturate,                       "PackUnsignedSaturate",                         AVX2,         -1,              32,           2,     {INS_invalid,           INS_packuswb,       INS_invalid,        INS_packusdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical,                           "ShiftLeftLogical",                             AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_psllw,          INS_psllw,          INS_pslld,          INS_pslld,          INS_psllq,          INS_psllq,          INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical128BitLane,                 "ShiftLeftLogical128BitLane",                   AVX2,         -1,              32,           2,     {INS_pslldq,            INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_ShiftLeftLogicalVariable,                   "ShiftLeftLogicalVariable",                     AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpsllvd,        INS_vpsllvd,        INS_vpsllvq,        INS_vpsllvq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
 HARDWARE_INTRINSIC(AVX2_ShiftRightArithmetic,                       "ShiftRightArithmetic",                         AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_psraw,          INS_invalid,        INS_psrad,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2_ShiftRightArithmeticVariable,               "ShiftRightArithmeticVariable",                 AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpsravd,        INS_vpsravd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
 HARDWARE_INTRINSIC(AVX2_ShiftRightLogical,                          "ShiftRightLogical",                            AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_psrlw,          INS_psrlw,          INS_psrld,          INS_psrld,          INS_psrlq,          INS_psrlq,          INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_ShiftRightLogical128BitLane,                "ShiftRightLogical128BitLane",                  AVX2,         -1,              32,           2,     {INS_psrldq,            INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_ShiftRightLogicalVariable,                  "ShiftRightLogicalVariable",                    AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpsrlvd,        INS_vpsrlvd,        INS_vpsrlvq,        INS_vpsrlvq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2_Shuffle,                                    "Shuffle",                                      AVX2,         -1,              32,           2,     {INS_pshufb,            INS_pshufb,         INS_invalid,        INS_invalid,        INS_pshufd,         INS_pshufd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM)
+HARDWARE_INTRINSIC(AVX2_ShuffleHigh,                                "ShuffleHigh",                                  AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pshufhw,        INS_pshufhw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2_ShuffleLow,                                 "ShuffleLow",                                   AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pshuflw,        INS_pshuflw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2_Sign,                                       "Sign",                                         AVX2,         -1,              32,           2,     {INS_psignb,            INS_invalid,        INS_psignw,         INS_invalid,        INS_psignd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_SumAbsoluteDifferences,                     "SumAbsoluteDifferences",                       AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_psadbw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_Subtract,                                   "Subtract",                                     AVX2,         -1,              32,           2,     {INS_psubb,             INS_psubb,          INS_psubw,          INS_psubw,          INS_psubd,          INS_psubd,          INS_psubq,          INS_psubq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_SubtractSaturate,                           "SubtractSaturate",                             AVX2,         -1,              32,           2,     {INS_psubsb,            INS_psubusb,        INS_psubsw,         INS_psubusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_UnpackHigh,                                 "UnpackHigh",                                   AVX2,         -1,              32,           2,     {INS_punpckhbw,         INS_punpckhbw,      INS_punpckhwd,      INS_punpckhwd,      INS_punpckhdq,      INS_punpckhdq,      INS_punpckhqdq,     INS_punpckhqdq,     INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
index ea7abde..3948273 100644 (file)
@@ -1348,6 +1348,17 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_AVX2_PermuteVar8x32:
+        {
+            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+            // swap the two operands
+            GenTree* indexVector  = impSIMDPopStack(TYP_SIMD32);
+            GenTree* sourceVector = impSIMDPopStack(TYP_SIMD32);
+            retNode =
+                gtNewSimdHWIntrinsicNode(TYP_SIMD32, indexVector, sourceVector, NI_AVX2_PermuteVar8x32, baseType, 32);
+            break;
+        }
+
         case NI_AVX2_GatherMaskVector128:
         case NI_AVX2_GatherMaskVector256:
         {
index 51d5de1..84727f3 100644 (file)
@@ -500,6 +500,8 @@ INST3(vpermilpsvar,     "permilpsvar",      IUM_WR, BAD_CODE,     BAD_CODE,
 INST3(vpermilpdvar,     "permilpdvar",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x0D),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
 INST3(vperm2f128,       "perm2f128",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x06),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Permute Floating-Point Values
 INST3(vpermpd,          "permpd",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE3A(0x01),                             INS_FLAGS_None)    // Permute Double-Precision Floating-Point Values
+INST3(vpermd,           "permd",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x36),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Permute Packed Doublewords Elements
+INST3(vpermps,          "permps",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x16),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Permute Single-Precision Floating-Point Elements
 INST3(vbroadcastf128,   "broadcastf128",    IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x1A),                             INS_FLAGS_None)    // Broadcast packed float values read from memory to entire ymm register
 INST3(vbroadcasti128,   "broadcasti128",    IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x5A),                             INS_FLAGS_None)    // Broadcast packed integer values read from memory to entire ymm register
 INST3(vmaskmovps,       "maskmovps",        IUM_WR, SSE38(0x2E),  BAD_CODE,     SSE38(0x2C),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
index b38fa7f..292483c 100644 (file)
@@ -2493,11 +2493,14 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
                 case NI_AVX_Permute2x128:
                 case NI_AVX2_Blend:
                 case NI_AVX2_InsertVector128:
+                case NI_AVX2_MultipleSumAbsoluteDifferences:
                 case NI_AVX2_Permute2x128:
                 case NI_AVX2_Permute4x64:
                 case NI_AVX2_ShiftLeftLogical:
                 case NI_AVX2_ShiftRightArithmetic:
                 case NI_AVX2_ShiftRightLogical:
+                case NI_AVX2_ShuffleHigh:
+                case NI_AVX2_ShuffleLow:
                 {
                     assert(supportsSIMDScalarLoads == false);
 
@@ -3089,6 +3092,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                         case NI_AVX_Permute2x128:
                         case NI_AVX_Shuffle:
                         case NI_AVX2_Blend:
+                        case NI_AVX2_MultipleSumAbsoluteDifferences:
                         case NI_AVX2_Permute2x128:
                         case NI_PCLMULQDQ_CarrylessMultiply:
                         {