Implement Shuffle* SSE2 hardware intrinsics
authorJacek Blaszczynski <biosciencenow@outlook.com>
Wed, 28 Feb 2018 05:08:55 +0000 (06:08 +0100)
committerJacek Blaszczynski <biosciencenow@outlook.com>
Fri, 2 Mar 2018 22:48:49 +0000 (23:48 +0100)
src/jit/compiler.h
src/jit/emitxarch.cpp
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/instrsxarch.h
src/jit/lowerxarch.cpp
src/jit/lsraxarch.cpp

index 7bee3bf..20f71b7 100644 (file)
@@ -3107,7 +3107,7 @@ protected:
     bool isScalarISA(InstructionSet isa);
     static int ivalOfHWIntrinsic(NamedIntrinsic intrinsic);
     unsigned simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig);
-    static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic);
+    static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node = nullptr);
     static GenTree* lastOpOfHWIntrinsic(GenTreeHWIntrinsic* node, int numArgs);
     static instruction insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type);
     static HWIntrinsicCategory categoryOfHWIntrinsic(NamedIntrinsic intrinsic);
index 4ffe342..0bd85f2 100644 (file)
@@ -5535,15 +5535,28 @@ static bool isSseShift(instruction ins)
     }
 }
 
-static bool isSSEExtract(instruction ins)
+//------------------------------------------------------------------------
+// IsDstSrcImmAvxInstruction: check if instruction has RM R I format
+// for all encodings: EVEX, VEX and legacy SSE
+//
+// Arguments:
+//    instruction -- processor instruction to check
+//
+// Return Value:
+//    true if instruction has RRI format
+//
+static bool IsDstSrcImmAvxInstruction(instruction ins)
 {
     switch (ins)
     {
+        case INS_extractps:
         case INS_pextrb:
         case INS_pextrw:
         case INS_pextrd:
         case INS_pextrq:
-        case INS_extractps:
+        case INS_pshufd:
+        case INS_pshufhw:
+        case INS_pshuflw:
             return true;
         default:
             return false;
@@ -5554,7 +5567,7 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg,
 {
     // TODO-XARCH refactoring emitIns_R_R_I to handle SSE2/AVX2 shift as well as emitIns_R_I
     bool isShift = isSseShift(ins);
-    if (isSSEExtract(ins) || (UseVEXEncoding() && !isShift))
+    if (IsDstSrcImmAvxInstruction(ins) || (UseVEXEncoding() && !isShift))
     {
         emitIns_R_R_I(ins, attr, reg, reg1, ival);
     }
index 2f0e5f8..c4d5546 100644 (file)
@@ -56,7 +56,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
     HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
     HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
     int                 ival        = Compiler::ivalOfHWIntrinsic(intrinsicID);
-    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID, node);
 
     assert((flags & HW_Flag_NoCodeGen) == 0);
 
index 5365c6e..c426071 100644 (file)
@@ -194,7 +194,7 @@ HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128,                        "LoadAligne
 HARDWARE_INTRINSIC(SSE2_LoadFence,                                   "LoadFence",                                        SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_LoadScalarVector128,                         "LoadScalarVector128",                              SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movd,      INS_movd,      INS_movq,      INS_movq,      INS_invalid,   INS_movsdsse2},         HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_LoadVector128,                               "LoadVector128",                                    SSE2,       -1,           16,          1,            {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_invalid,   INS_movupd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_MaskMove,                                    "MaskMove",                                         SSE2,       -1,           16,          3,            {INS_maskmovdqu,INS_maskmovdqu,INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_MaskMove,                                    "MaskMove",                                         SSE2,       -1,           16,          3,            {INS_maskmovdqu,INS_maskmovdqu,INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Max,                                         "Max",                                              SSE2,       -1,           16,          2,            {INS_invalid,   INS_pmaxub,    INS_pmaxsw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_MemoryFence,                                 "MemoryFence",                                      SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_MaxScalar,                                   "MaxScalar",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
@@ -217,14 +217,17 @@ HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane,                  "ShiftLeftL
 HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic,                        "ShiftRightArithmetic",                             SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_psraw,     INS_invalid,   INS_psrad,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftRightLogical,                           "ShiftRightLogical",                                SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_psrlw,     INS_psrlw,     INS_psrld,     INS_psrld,     INS_psrlq,     INS_psrlq,     INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftRightLogical128BitLane,                 "ShiftRightLogical128BitLane",                      SSE2,       -1,           16,          2,            {INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2_Shuffle,                                     "Shuffle",                                          SSE2,       -1,           16,         -1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pshufd,    INS_pshufd,    INS_invalid,   INS_invalid,   INS_invalid,   INS_shufpd},            HW_Category_IMM,                               HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2_ShuffleHigh,                                 "ShuffleHigh",                                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_pshufhw,   INS_pshufhw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2_ShuffleLow,                                  "ShuffleLow",                                       SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_pshuflw,   INS_pshuflw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_Sqrt,                                        "Sqrt",                                             SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_SqrtScalar,                                  "SqrtScalar",                                       SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtsd},            HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Store,                                       "Store",                                            SSE2,       -1,           16,          2,            {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_invalid,   INS_movupd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_StoreAligned,                                "StoreAligned",                                     SSE2,       -1,           16,          2,            {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_invalid,   INS_movapd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal,                     "StoreAlignedNonTemporal",                          SSE2,       -1,           16,          2,            {INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_invalid,   INS_movntpd},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_StoreScalar,                                 "StoreScalar",                                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsdsse2},         HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_StoreLow,                                    "StoreLow",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movq,      INS_movq,      INS_invalid,   INS_movlpd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_StoreHigh,                                   "StoreHigh",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhpd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreLow,                                    "StoreLow",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movq,      INS_movq,      INS_invalid,   INS_movlpd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreScalar,                                 "StoreScalar",                                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsdsse2},         HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Subtract,                                    "Subtract",                                         SSE2,       -1,           16,          2,            {INS_psubb,     INS_psubb,     INS_psubw,     INS_psubw,     INS_psubd,     INS_psubd,     INS_psubq,     INS_psubq,     INS_invalid,   INS_subpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_SubtractSaturate,                            "SubtractSaturate",                                 SSE2,       -1,           16,          2,            {INS_psubsb,    INS_psubusb,   INS_psubsw,    INS_psubusw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_SubtractScalar,                              "SubtractScalar",                                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_subsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
@@ -320,7 +323,7 @@ HARDWARE_INTRINSIC(SSE42_CompareGreaterThan,                         "CompareGre
 //  AVX Intrinsics
 HARDWARE_INTRINSIC(AVX_IsSupported,                                  "get_IsSupported",                                  AVX,        -1,            0,           0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_Add,                                          "Add",                                              AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_addps,     INS_addpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_AddSubtract,                                  "AddSubtract",                                      AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_addsubps,  INS_addsubpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)                 
+HARDWARE_INTRINSIC(AVX_AddSubtract,                                  "AddSubtract",                                      AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_addsubps,  INS_addsubpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_And,                                          "And",                                              AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_andps,     INS_andpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX_AndNot,                                       "AndNot",                                           AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_andnps,    INS_andnpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_Blend,                                        "Blend",                                            AVX,        -1,           32,           3,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_blendps,   INS_blendpd},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
index 86a9489..ef64c4e 100644 (file)
@@ -219,19 +219,64 @@ unsigned Compiler::simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_I
 }
 
 //------------------------------------------------------------------------
-// numArgsOfHWIntrinsic: get the number of arguments
+// numArgsOfHWIntrinsic: get the number of arguments based on table and
+// if numArgs is -1 check number of arguments using GenTreeHWIntrinsic
+// node unless it is nullptr
 //
 // Arguments:
-//    intrinsic -- id of the intrinsic function.
+//    intrinsic -- id of the intrinsic function
+//    node      -- GenTreeHWIntrinsic* node with nullptr default value
 //
 // Return Value:
 //     number of arguments
 //
-int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic)
+int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node)
 {
     assert(intrinsic != NI_Illegal);
     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
-    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs;
+
+    int numArgs = hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs;
+    if (numArgs >= 0)
+    {
+        return numArgs;
+    }
+
+    noway_assert(node != nullptr);
+    assert(numArgs == -1);
+
+    GenTree* op1 = node->gtGetOp1();
+    GenTree* op2 = node->gtGetOp2();
+
+    if (op2 != nullptr)
+    {
+        return 2;
+    }
+
+    if (op1 != nullptr)
+    {
+        if (op1->OperIsList())
+        {
+            numArgs              = 0;
+            GenTreeArgList* list = op1->AsArgList();
+
+            while (list != nullptr)
+            {
+                numArgs++;
+                list = list->Rest();
+            }
+
+            assert(numArgs > 0);
+            return numArgs;
+        }
+        else
+        {
+            return 1;
+        }
+    }
+    else
+    {
+        return 0;
+    }
 }
 
 //------------------------------------------------------------------------
index d448afc..71da647 100644 (file)
@@ -369,8 +369,10 @@ INST3( pcmpeqb,     "pcmpeqb"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,
 INST3( pcmpgtb,     "pcmpgtb"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x64))   // Packed compare 8-bit signed integers for greater than
 
 INST3( pshufd,      "pshufd"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x70))   // Packed shuffle of 32-bit integers
+INST3( pshufhw,     "pshufhw"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      SSEFLT(0x70))   // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
+INST3( pshuflw,     "pshuflw"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      SSEDBL(0x70))   // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.
 INST3( pextrw,      "pextrw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xC5))   // Extract 16-bit value into a r32 with zero extended to 32-bits
-INST3( pinsrw,      "pinsrw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xC4))   // packed insert word
+INST3( pinsrw,      "pinsrw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xC4))   // Insert word at index
 
 INST3( punpckhbw,   "punpckhbw"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x68))   // Packed logical (unsigned) widen ubyte to ushort (hi)
 INST3( punpcklbw,   "punpcklbw"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x60))   // Packed logical (unsigned) widen ubyte to ushort (lo)
@@ -448,7 +450,7 @@ INST3( phsubsw,      "phsubsw"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SS
 INST3( lddqu,        "lddqu"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEDBL(0xF0))  // Load Unaligned integer
 INST3( movntdqa,     "movntdqa"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x2A))   // Load Double Quadword Non-Temporal Aligned Hint
 INST3( movddup,      "movddup"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEDBL(0x12))  // Replicate Double FP Values
-INST3( movsldup,     "movsldup"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEFLT(0x12))  // Replicate even-indexed Single FP Values  
+INST3( movsldup,     "movsldup"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEFLT(0x12))  // Replicate even-indexed Single FP Values
 INST3( movshdup,     "movshdup"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEFLT(0x16))  // Replicate odd-indexed Single FP Values
 INST3( phminposuw,   "phminposuw"  , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x41))   // Packed Horizontal Word Minimum
 INST3( mpsadbw,      "mpsadbw"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x42))   // Compute Multiple Packed Sums of Absolute Difference
index 3f1deb8..45799e3 100644 (file)
@@ -2367,7 +2367,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
     NamedIntrinsic      intrinsicID = node->gtHWIntrinsicId;
     HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
     HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
-    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID, node);
     GenTree*            op1         = node->gtGetOp1();
     GenTree*            op2         = node->gtGetOp2();
 
index fdb875a..296d558 100644 (file)
@@ -2259,7 +2259,7 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
     InstructionSet      isa         = Compiler::isaOfHWIntrinsic(intrinsicID);
     HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
     HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
-    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID, intrinsicTree);
 
     if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2)
     {
@@ -2321,11 +2321,9 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
 
             assert((flags & HW_Flag_NoCodeGen) == 0);
 
-            assert(numArgs != 0);
-            assert(numArgs != 1);
-
             if (info->srcCount >= 2)
             {
+                assert(numArgs >= 2);
                 LocationInfoListNode* op2Info = useList.Begin()->Next();
                 op2Info->info.isDelayFree     = true;
                 info->hasDelayFreeSrc         = true;