Implement AVX/AVX2 InsertVector128 and ExtractVector128 intrinsic
authorFei Peng <fei.peng@intel.com>
Fri, 16 Mar 2018 07:44:47 +0000 (00:44 -0700)
committerTanner Gooding <tagoo@outlook.com>
Fri, 16 Mar 2018 13:50:46 +0000 (06:50 -0700)
src/jit/codegenlinear.h
src/jit/compiler.h
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/instrsxarch.h
src/jit/namedintrinsiclist.h

index c36e423..7ef7e95 100644 (file)
@@ -123,8 +123,7 @@ void genSSEIntrinsic(GenTreeHWIntrinsic* node);
 void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
 void genSSE41Intrinsic(GenTreeHWIntrinsic* node);
 void genSSE42Intrinsic(GenTreeHWIntrinsic* node);
-void genAVXIntrinsic(GenTreeHWIntrinsic* node);
-void genAVX2Intrinsic(GenTreeHWIntrinsic* node);
+void genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node);
 void genAESIntrinsic(GenTreeHWIntrinsic* node);
 void genBMI1Intrinsic(GenTreeHWIntrinsic* node);
 void genBMI2Intrinsic(GenTreeHWIntrinsic* node);
index 1eb0fe5..f1c9529 100644 (file)
@@ -3063,14 +3063,10 @@ protected:
                                CORINFO_METHOD_HANDLE method,
                                CORINFO_SIG_INFO*     sig,
                                bool                  mustExpand);
-    GenTree* impAVXIntrinsic(NamedIntrinsic        intrinsic,
-                             CORINFO_METHOD_HANDLE method,
-                             CORINFO_SIG_INFO*     sig,
-                             bool                  mustExpand);
-    GenTree* impAVX2Intrinsic(NamedIntrinsic        intrinsic,
-                              CORINFO_METHOD_HANDLE method,
-                              CORINFO_SIG_INFO*     sig,
-                              bool                  mustExpand);
+    GenTree* impAvxOrAvx2Intrinsic(NamedIntrinsic        intrinsic,
+                                   CORINFO_METHOD_HANDLE method,
+                                   CORINFO_SIG_INFO*     sig,
+                                   bool                  mustExpand);
     GenTree* impAESIntrinsic(NamedIntrinsic        intrinsic,
                              CORINFO_METHOD_HANDLE method,
                              CORINFO_SIG_INFO*     sig,
index 873f1c6..d19fe80 100644 (file)
@@ -252,10 +252,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             genSSE42Intrinsic(node);
             break;
         case InstructionSet_AVX:
-            genAVXIntrinsic(node);
-            break;
         case InstructionSet_AVX2:
-            genAVX2Intrinsic(node);
+            genAvxOrAvx2Intrinsic(node);
             break;
         case InstructionSet_AES:
             genAESIntrinsic(node);
@@ -1269,24 +1267,30 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
 }
 
 //------------------------------------------------------------------------
-// genAVXIntrinsic: Generates the code for an AVX hardware intrinsic node
+// genAvxOrAvx2Intrinsic: Generates the code for an AVX/AVX2 hardware intrinsic node
 //
 // Arguments:
 //    node - The hardware intrinsic node
 //
-void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
+void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
     var_types      baseType    = node->gtSIMDBaseType;
     emitAttr       attr        = EA_ATTR(node->gtSIMDSize);
     var_types      targetType  = node->TypeGet();
     instruction    ins         = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+    int            numArgs     = Compiler::numArgsOfHWIntrinsic(node);
     GenTree*       op1         = node->gtGetOp1();
     GenTree*       op2         = node->gtGetOp2();
+    regNumber      op1Reg      = REG_NA;
+    regNumber      op2Reg      = REG_NA;
     regNumber      targetReg   = node->gtRegNum;
     emitter*       emit        = getEmitter();
 
-    genConsumeOperands(node);
+    if ((op1 != nullptr) && !op1->OperIsList())
+    {
+        genConsumeOperands(node);
+    }
 
     switch (intrinsicID)
     {
@@ -1357,30 +1361,88 @@ void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
             break;
         }
 
-        default:
-            unreached();
-            break;
-    }
+        case NI_AVX_ExtractVector128:
+        case NI_AVX_InsertVector128:
+        case NI_AVX2_ExtractVector128:
+        case NI_AVX2_InsertVector128:
+        {
+            GenTree* lastOp = nullptr;
+            if (numArgs == 2)
+            {
+                assert(intrinsicID == NI_AVX_ExtractVector128 || NI_AVX_ExtractVector128);
+                op1Reg = op1->gtRegNum;
+                op2Reg = op2->gtRegNum;
+                lastOp = op2;
+            }
+            else
+            {
+                assert(numArgs == 3);
+                assert(op1->OperIsList());
+                assert(op1->gtGetOp2()->OperIsList());
+                assert(op1->gtGetOp2()->gtGetOp2()->OperIsList());
 
-    genProduceReg(node);
-}
+                GenTreeArgList* argList = op1->AsArgList();
+                op1                     = argList->Current();
+                genConsumeRegs(op1);
+                op1Reg = op1->gtRegNum;
 
-//------------------------------------------------------------------------
-// genAVX2Intrinsic: Generates the code for an AVX2 hardware intrinsic node
-//
-// Arguments:
-//    node - The hardware intrinsic node
-//
-void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
-{
-    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
-    var_types      baseType    = node->gtSIMDBaseType;
-    instruction    ins         = INS_invalid;
+                argList = argList->Rest();
+                op2     = argList->Current();
+                genConsumeRegs(op2);
+                op2Reg = op2->gtRegNum;
 
-    genConsumeOperands(node);
+                argList = argList->Rest();
+                lastOp  = argList->Current();
+                genConsumeRegs(lastOp);
+            }
+
+            regNumber op3Reg = lastOp->gtRegNum;
+
+            auto emitSwCase = [&](unsigned i) {
+                // TODO-XARCH-Bug the emitter cannot work with imm8 >= 128,
+                // so clear the 8th bit that is not used by the instructions
+                i &= 0x7FU;
+                if (numArgs == 3)
+                {
+                    if (intrinsicID == NI_AVX_ExtractVector128 || intrinsicID == NI_AVX2_ExtractVector128)
+                    {
+                        emit->emitIns_R_AR_I(ins, attr, op2Reg, op1Reg, 0, (int)i);
+                    }
+                    else if (op2->TypeGet() == TYP_I_IMPL)
+                    {
+                        emit->emitIns_SIMD_R_R_AR_I(ins, attr, targetReg, op1Reg, op2Reg, (int)i);
+                    }
+                    else
+                    {
+                        assert(op2->TypeGet() == TYP_SIMD16);
+                        emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, (int)i);
+                    }
+                }
+                else
+                {
+                    assert(numArgs == 2);
+                    assert(intrinsicID == NI_AVX_ExtractVector128 || intrinsicID == NI_AVX2_ExtractVector128);
+                    emit->emitIns_SIMD_R_R_I(ins, attr, targetReg, op1Reg, (int)i);
+                }
+            };
+
+            if (lastOp->IsCnsIntOrI())
+            {
+                ssize_t ival = lastOp->AsIntCon()->IconValue();
+                emitSwCase((unsigned)ival);
+            }
+            else
+            {
+                // We emit a fallback case for the scenario when the imm-op is not a constant. This should
+                // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
+                // can also occur if the consumer calls it directly and just doesn't pass a constant value.
+                regNumber baseReg = node->ExtractTempReg();
+                regNumber offsReg = node->GetSingleTempReg();
+                genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase);
+            }
+            break;
+        }
 
-    switch (intrinsicID)
-    {
         default:
             unreached();
             break;
index c8c611f..7435c28 100644 (file)
@@ -348,10 +348,12 @@ HARDWARE_INTRINSIC(AVX_DotProduct,                                   "DotProduct
 HARDWARE_INTRINSIC(AVX_DuplicateEvenIndexed,                         "DuplicateEvenIndexed",                             AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsldup,  INS_movddup},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_DuplicateOddIndexed,                          "DuplicateOddIndexed",                              AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movshdup,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_ExtendToVector256,                            "ExtendToVector256",                                AVX,        -1,           32,           1,           {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movaps,    INS_movapd},            HW_Category_Helper,                            HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_ExtractVector128,                             "ExtractVector128",                                 AVX,        -1,           32,          -1,           {INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128, INS_vextractf128},HW_Category_IMM,                  HW_Flag_OneTypeGeneric|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX_Floor,                                        "Floor",                                            AVX,         9,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_GetLowerHalf,                                 "GetLowerHalf",                                     AVX,        -1,           32,           1,           {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movaps,    INS_movapd},            HW_Category_Helper,                            HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_HorizontalAdd,                                "HorizontalAdd",                                    AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_haddps,    INS_haddpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_HorizontalSubtract,                           "HorizontalSubtract",                               AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_hsubps,    INS_hsubpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX_InsertVector128,                              "InsertVector128",                                  AVX,        -1,           32,           3,           {INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128, INS_vinsertf128},HW_Category_IMM,                            HW_Flag_FullRangeIMM|HW_Flag_OneTypeGeneric|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX_LoadAlignedVector256,                         "LoadAlignedVector256",                             AVX,        -1,           32,           1,           {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movaps,    INS_movapd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_LoadDquVector256,                             "LoadDquVector256",                                 AVX,        -1,           32,           1,           {INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_LoadVector256,                                "LoadVector256",                                    AVX,        -1,           32,           1,           {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movups,    INS_movupd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
@@ -395,10 +397,12 @@ HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector128,                  "BroadcastS
 HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector256,                  "BroadcastScalarToVector256",                       AVX2,       -1,           32,           1,           {INS_vpbroadcastb,INS_vpbroadcastb,INS_vpbroadcastw,INS_vpbroadcastw,INS_vpbroadcastd,INS_vpbroadcastd,INS_vpbroadcastq,INS_vpbroadcastq,INS_vbroadcastss,INS_vbroadcastsd},   HW_Category_SimpleSIMD,         HW_Flag_OneTypeGeneric)
 HARDWARE_INTRINSIC(AVX2_CompareEqual,                                "CompareEqual",                                     AVX2,       -1,           32,           2,           {INS_pcmpeqb,   INS_pcmpeqb,   INS_pcmpeqw,   INS_pcmpeqw,   INS_pcmpeqd,   INS_pcmpeqd,   INS_pcmpeqq,   INS_pcmpeqq,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_CompareGreaterThan,                          "CompareGreaterThan",                               AVX2,       -1,           32,           2,           {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_pcmpgtq,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_ExtractVector128,                            "ExtractVector128",                                 AVX2,       -1,           32,          -1,           {INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_vextracti128,INS_invalid, INS_invalid},HW_Category_IMM,                            HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_HorizontalAdd,                               "HorizontalAdd",                                    AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_phaddw,    INS_invalid,   INS_phaddd,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate,                       "HorizontalAddSaturate",                            AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_phaddsw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_HorizontalSubtract,                          "HorizontalSubtract",                               AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_phsubw,    INS_invalid,   INS_phsubd,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate,                  "HorizontalSubtractSaturate",                       AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_phsubsw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_InsertVector128,                             "InsertVector128",                                  AVX2,       -1,           32,           3,           {INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_vinserti128,INS_invalid, INS_invalid},     HW_Category_IMM,                               HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal,             "LoadAlignedVector256NonTemporal",                  AVX2,       -1,           32,           1,           {INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2_Multiply,                                    "Multiply",                                         AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuldq,    INS_pmuludq,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Or,                                          "Or",                                               AVX2,       -1,           32,           2,           {INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
index 56c7e99..c52958a 100644 (file)
@@ -660,7 +660,8 @@ bool Compiler::hwIntrinsicSignatureTypeSupported(var_types retType, CORINFO_SIG_
 static bool impIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
 {
     // HW_Flag_NoCodeGen implies this intrinsic should be manually morphed in the importer.
-    return category != HW_Category_Special && category != HW_Category_Scalar && (flags & HW_Flag_NoCodeGen) == 0;
+    return category != HW_Category_Special && category != HW_Category_Scalar &&
+           ((flags & (HW_Flag_NoCodeGen | HW_Flag_SpecialImport)) == 0);
 }
 
 //------------------------------------------------------------------------
@@ -769,7 +770,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
         assert(baseType != TYP_UNKNOWN);
     }
 
-    if ((flags & (HW_Flag_OneTypeGeneric | HW_Flag_TwoTypeGeneric)) != 0)
+    if (((flags & (HW_Flag_OneTypeGeneric | HW_Flag_TwoTypeGeneric)) != 0) && ((flags & HW_Flag_SpecialImport) == 0))
     {
         if (!varTypeIsArithmetic(baseType))
         {
@@ -869,9 +870,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
         case InstructionSet_SSE42:
             return impSSE42Intrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_AVX:
-            return impAVXIntrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_AVX2:
-            return impAVX2Intrinsic(intrinsic, method, sig, mustExpand);
+            return impAvxOrAvx2Intrinsic(intrinsic, method, sig, mustExpand);
 
         case InstructionSet_AES:
             return impAESIntrinsic(intrinsic, method, sig, mustExpand);
@@ -1058,35 +1058,48 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic        intrinsic,
     return retNode;
 }
 
-GenTree* Compiler::impAVXIntrinsic(NamedIntrinsic        intrinsic,
-                                   CORINFO_METHOD_HANDLE method,
-                                   CORINFO_SIG_INFO*     sig,
-                                   bool                  mustExpand)
+GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic        intrinsic,
+                                         CORINFO_METHOD_HANDLE method,
+                                         CORINFO_SIG_INFO*     sig,
+                                         bool                  mustExpand)
 {
     GenTree*  retNode  = nullptr;
     GenTree*  op1      = nullptr;
     GenTree*  op2      = nullptr;
     var_types baseType = TYP_UNKNOWN;
-    switch (intrinsic)
-    {
-        default:
-            JITDUMP("Not implemented hardware intrinsic");
-            break;
-    }
-    return retNode;
-}
+    int       simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
 
-GenTree* Compiler::impAVX2Intrinsic(NamedIntrinsic        intrinsic,
-                                    CORINFO_METHOD_HANDLE method,
-                                    CORINFO_SIG_INFO*     sig,
-                                    bool                  mustExpand)
-{
-    GenTree*  retNode  = nullptr;
-    GenTree*  op1      = nullptr;
-    GenTree*  op2      = nullptr;
-    var_types baseType = TYP_UNKNOWN;
     switch (intrinsic)
     {
+        case NI_AVX_ExtractVector128:
+        case NI_AVX2_ExtractVector128:
+        {
+            GenTree* lastOp = impPopStack().val;
+            assert(lastOp->IsCnsIntOrI() || mustExpand);
+            GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
+            if (sig->numArgs == 2)
+            {
+                baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+                if (!varTypeIsArithmetic(baseType))
+                {
+                    retNode = impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
+                }
+                else
+                {
+                    retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, lastOp, intrinsic, baseType, 32);
+                }
+            }
+            else
+            {
+                assert(sig->numArgs == 3);
+                op1                                    = impPopStack().val;
+                CORINFO_ARG_LIST_HANDLE secondArg      = info.compCompHnd->getArgNext(sig->args);
+                CORINFO_CLASS_HANDLE    secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
+                baseType                               = getBaseTypeOfSIMDType(secondArgClass);
+                retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, vectorOp, lastOp, intrinsic, baseType, 32);
+            }
+            break;
+        }
         default:
             JITDUMP("Not implemented hardware intrinsic");
             break;
index c0cd91d..3be838b 100644 (file)
@@ -473,8 +473,8 @@ INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SS
 INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x79))   // Broadcast int16 value from reg/memory to entire ymm register
 INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x58))   // Broadcast int32 value from reg/memory to entire ymm register
 INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x59))   // Broadcast int64 value from reg/memory to entire ymm register
-INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19),  BAD_CODE, BAD_CODE)      // Extract 128-bit packed floating point values
-INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39),  BAD_CODE, BAD_CODE)      // Extract 128-bit packed integer values
+INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19),  BAD_CODE, SSE3A(0x19))   // Extract 128-bit packed floating point values
+INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39),  BAD_CODE, SSE3A(0x39))   // Extract 128-bit packed integer values
 INST3( vinsertf128,  "insertf128"  , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x18))   // Insert 128-bit packed floating point values
 INST3( vinserti128,  "inserti128"  , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x38))   // Insert 128-bit packed integer values
 INST3( vzeroupper,   "zeroupper"   , 0, IUM_WR, 0, 0, 0xC577F8,     BAD_CODE, BAD_CODE)      // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
index f4d7cb2..dd98821 100644 (file)
@@ -97,14 +97,19 @@ enum HWIntrinsicFlag : unsigned int
     // Select base type using the second argument type
     HW_Flag_BaseTypeFromSecondArg = 0x10000,
 
-    // Specail codegen
+    // Special codegen
     // the intrinsics need special rules in CodeGen,
-    // but can be table-driven in the front-end
+    // but may be table-driven in the front-end
     HW_Flag_SpecialCodeGen = 0x20000,
 
     // No Read/Modify/Write Semantics
     // the intrinsic doesn't have read/modify/write semantics in two/three-operand form.
     HW_Flag_NoRMWSemantics = 0x40000,
+
+    // Special import
+    // the intrinsics need special rules in importer,
+    // but may be table-driven in the back-end
+    HW_Flag_SpecialImport = 0x80000,
 };
 
 inline HWIntrinsicFlag operator|(HWIntrinsicFlag c1, HWIntrinsicFlag c2)