Update 64-bit intrinsics and simplify SSE/SSE2 intrinsic
authorFei Peng <fei.peng@intel.com>
Wed, 28 Feb 2018 07:15:29 +0000 (23:15 -0800)
committerFei Peng <fei.peng@intel.com>
Thu, 1 Mar 2018 19:29:39 +0000 (11:29 -0800)
src/jit/compiler.h
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/namedintrinsiclist.h

index 7b9313a..7bee3bf 100644 (file)
@@ -3059,14 +3059,6 @@ protected:
                               CORINFO_METHOD_HANDLE method,
                               CORINFO_SIG_INFO*     sig,
                               bool                  mustExpand);
-    GenTree* impSSE3Intrinsic(NamedIntrinsic        intrinsic,
-                              CORINFO_METHOD_HANDLE method,
-                              CORINFO_SIG_INFO*     sig,
-                              bool                  mustExpand);
-    GenTree* impSSSE3Intrinsic(NamedIntrinsic        intrinsic,
-                               CORINFO_METHOD_HANDLE method,
-                               CORINFO_SIG_INFO*     sig,
-                               bool                  mustExpand);
     GenTree* impSSE41Intrinsic(NamedIntrinsic        intrinsic,
                                CORINFO_METHOD_HANDLE method,
                                CORINFO_SIG_INFO*     sig,
@@ -3125,6 +3117,7 @@ protected:
     GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType);
     static bool isImmHWIntrinsic(NamedIntrinsic intrinsic, GenTree* lastOp);
     GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand);
+    bool hwIntrinsicSignatureTypeSupported(var_types retType, CORINFO_SIG_INFO* sig, HWIntrinsicFlag flags);
 #endif // _TARGET_XARCH_
 #ifdef _TARGET_ARM64_
     InstructionSet lookupHWIntrinsicISA(const char* className);
index 7fda952..b38c2bf 100644 (file)
@@ -36,10 +36,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 static bool genIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
 {
     // TODO - make more categories to the table-driven framework
-    // HW_Category_Helper and HW_Flag_MultiIns usually need manual codegen
+    // HW_Category_Helper and HW_Flag_MultiIns/HW_Flag_SpecialCodeGen usually need manual codegen
     const bool tableDrivenCategory =
         category != HW_Category_Special && category != HW_Category_Scalar && category != HW_Category_Helper;
-    const bool tableDrivenFlag = (flags & HW_Flag_MultiIns) == 0;
+    const bool tableDrivenFlag = (flags & (HW_Flag_MultiIns | HW_Flag_SpecialCodeGen)) == 0;
     return tableDrivenCategory && tableDrivenFlag;
 }
 
@@ -593,17 +593,6 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
 
     switch (intrinsicID)
     {
-        case NI_SSE_ConvertScalarToVector128Single:
-        {
-            assert(node->TypeGet() == TYP_SIMD16);
-            assert(node->gtSIMDBaseType == TYP_FLOAT);
-            assert(Compiler::ivalOfHWIntrinsic(intrinsicID) == -1);
-
-            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
-            genHWIntrinsic_R_R_RM(node, ins);
-            break;
-        }
-
         case NI_SSE_CompareEqualOrderedScalar:
         case NI_SSE_CompareEqualUnorderedScalar:
         {
@@ -973,19 +962,14 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             assert(op2 == nullptr);
             assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT ||
                    baseType == TYP_LONG || baseType == TYP_ULONG);
-            if (op1Reg != targetReg)
+            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+            if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
             {
-                instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
-                if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
-                {
-                    emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
-                }
-                else
-                {
-                    // TODO-XArch-Bug https://github.com/dotnet/coreclr/issues/16329
-                    // using hardcoded instruction as workaround for inexact type conversions
-                    emit->emitIns_R_R(INS_mov_xmm2i, emitActualTypeSize(baseType), op1Reg, targetReg);
-                }
+                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
+            }
+            else
+            {
+                emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
             }
             break;
         }
index e14bbd1..e01a00c 100644 (file)
@@ -33,29 +33,29 @@ HARDWARE_INTRINSIC(SSE_AddScalar,                                    "AddScalar"
 HARDWARE_INTRINSIC(SSE_And,                                          "And",                                              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_andps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE_AndNot,                                       "AndNot",                                           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_andnps,    INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_CompareEqual,                                 "CompareEqual",                                     SSE,         0,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar,                    "CompareEqualOrderedScalar",                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar,                    "CompareEqualOrderedScalar",                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareEqualScalar,                           "CompareEqualScalar",                               SSE,         0,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar,                  "CompareEqualUnorderedScalar",                      SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar,                  "CompareEqualUnorderedScalar",                      SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThan,                           "CompareGreaterThan",                               SSE,         6,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar,              "CompareGreaterThanOrderedScalar",                  SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar,              "CompareGreaterThanOrderedScalar",                  SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar,                     "CompareGreaterThanScalar",                         SSE,         6,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar,            "CompareGreaterThanUnorderedScalar",                SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar,            "CompareGreaterThanUnorderedScalar",                SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual,                    "CompareGreaterThanOrEqual",                        SSE,         5,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar,       "CompareGreaterThanOrEqualOrderedScalar",           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar,       "CompareGreaterThanOrEqualOrderedScalar",           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar,              "CompareGreaterThanOrEqualScalar",                  SSE,         5,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar,     "CompareGreaterThanOrEqualUnorderedScalar",         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar,     "CompareGreaterThanOrEqualUnorderedScalar",         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThan,                              "CompareLessThan",                                  SSE,         1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar,                 "CompareLessThanOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar,                 "CompareLessThanOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThanScalar,                        "CompareLessThanScalar",                            SSE,         1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar,               "CompareLessThanUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar,               "CompareLessThanUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual,                       "CompareLessThanOrEqual",                           SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar,          "CompareLessThanOrEqualOrderedScalar",              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar,          "CompareLessThanOrEqualOrderedScalar",              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar,                 "CompareLessThanOrEqualScalar",                     SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar,        "CompareLessThanOrEqualUnorderedScalar",            SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar,        "CompareLessThanOrEqualUnorderedScalar",            SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareNotEqual,                              "CompareNotEqual",                                  SSE,         4,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar,                 "CompareNotEqualOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar,                 "CompareNotEqualOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar,                        "CompareNotEqualScalar",                            SSE,         4,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar,               "CompareNotEqualUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar,               "CompareNotEqualUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan,                        "CompareNotGreaterThan",                            SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar,                  "CompareNotGreaterThanScalar",                      SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual,                 "CompareNotGreaterThanOrEqual",                     SSE,         1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
@@ -68,12 +68,12 @@ HARDWARE_INTRINSIC(SSE_CompareOrdered,                               "CompareOrd
 HARDWARE_INTRINSIC(SSE_CompareOrderedScalar,                         "CompareOrderedScalar",                             SSE,         7,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_CompareUnordered,                             "CompareUnordered",                                 SSE,         3,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar,                       "CompareUnorderedScalar",                           SSE,         3,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32,                               "ConvertToInt32",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64,                               "ConvertToInt64",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToSingle,                              "ConvertToSingle",                                  SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_Helper,                            HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single,               "ConvertScalarToVector128Single",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2ss,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation,                 "ConvertToInt32WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation,                 "ConvertToInt64WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32,                               "ConvertToInt32",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64,                               "ConvertToInt64",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToSingle,                              "ConvertToSingle",                                  SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_Helper,                            HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single,               "ConvertScalarToVector128Single",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2ss,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_SecondArgMaybe64Bit|HW_Flag_BaseTypeFromFirstArg|HW_Flag_CopyUpperBits|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation,                 "ConvertToInt32WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation,                 "ConvertToInt64WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_Divide,                                       "Divide",                                           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_DivideScalar,                                 "DivideScalar",                                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_LoadAlignedVector128,                         "LoadAlignedVector128",                             SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movaps,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
@@ -134,29 +134,29 @@ HARDWARE_INTRINSIC(SSE2_And,                                         "And",
 HARDWARE_INTRINSIC(SSE2_AndNot,                                      "AndNot",                                           SSE2,       -1,           16,          2,            {INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_invalid,   INS_andnpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_Average,                                     "Average",                                          SSE2,       -1,           16,          2,            {INS_invalid,   INS_pavgb,     INS_invalid,   INS_pavgw,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_CompareEqual,                                "CompareEqual",                                     SSE2,        0,           16,          2,            {INS_pcmpeqb,   INS_pcmpeqb,   INS_pcmpeqw,   INS_pcmpeqw,   INS_pcmpeqd,   INS_pcmpeqd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar,                   "CompareEqualOrderedScalar",                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar,                   "CompareEqualOrderedScalar",                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareEqualScalar,                          "CompareEqualScalar",                               SSE2,        0,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar,                 "CompareEqualUnorderedScalar",                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar,                 "CompareEqualUnorderedScalar",                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThan,                          "CompareGreaterThan",                               SSE2,        6,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar,             "CompareGreaterThanOrderedScalar",                  SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar,             "CompareGreaterThanOrderedScalar",                  SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantic
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThanScalar,                    "CompareGreaterThanScalar",                         SSE2,        6,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar,           "CompareGreaterThanUnorderedScalar",                SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar,           "CompareGreaterThanUnorderedScalar",                SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual,                   "CompareGreaterThanOrEqual",                        SSE2,        5,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar,      "CompareGreaterThanOrEqualOrderedScalar",           SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar,      "CompareGreaterThanOrEqualOrderedScalar",           SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualScalar,             "CompareGreaterThanOrEqualScalar",                  SSE2,        5,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar,    "CompareGreaterThanOrEqualUnorderedScalar",         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar,    "CompareGreaterThanOrEqualUnorderedScalar",         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareLessThan,                             "CompareLessThan",                                  SSE2,        1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar,                "CompareLessThanOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanScalar,                       "CompareLessThanScalar",                            SSE2,        1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar,              "CompareLessThanUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar,                "CompareLessThanOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanScalar,                       "CompareLessThanScalar",                            SSE2,        1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar,              "CompareLessThanUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqual,                      "CompareLessThanOrEqual",                           SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar,         "CompareLessThanOrEqualOrderedScalar",              SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar,         "CompareLessThanOrEqualOrderedScalar",              SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualScalar,                "CompareLessThanOrEqualScalar",                     SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar,       "CompareLessThanOrEqualUnorderedScalar",            SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar,       "CompareLessThanOrEqualUnorderedScalar",            SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareNotEqual,                             "CompareNotEqual",                                  SSE2,        4,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar,                "CompareNotEqualOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar,                "CompareNotEqualOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareNotEqualScalar,                       "CompareNotEqualScalar",                            SSE2,        4,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar,              "CompareNotEqualUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar,              "CompareNotEqualUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan,                       "CompareNotGreaterThan",                            SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanScalar,                 "CompareNotGreaterThanScalar",                      SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual,                "CompareNotGreaterThanOrEqual",                     SSE2,        1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
@@ -169,23 +169,23 @@ HARDWARE_INTRINSIC(SSE2_CompareOrdered,                              "CompareOrd
 HARDWARE_INTRINSIC(SSE2_CompareOrderedScalar,                        "CompareOrderedScalar",                             SSE2,        7,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_CompareUnordered,                            "CompareUnordered",                                 SSE2,        3,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_CompareUnorderedScalar,                      "CompareUnorderedScalar",                           SSE2,        3,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_ConvertToDouble,                             "ConvertToDouble",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsd},             HW_Category_Helper,                            HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32,                              "ConvertToInt32",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation,                "ConvertToInt32WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64,                              "ConvertToInt64",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64WithTruncation,                "ConvertToInt64WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt32,                             "ConvertToUInt32",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt64,                             "ConvertToUInt64",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double,                    "ConvertToVector128Double",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2pd,  INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2pd,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double,              "ConvertScalarToVector128Double",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtss2sd,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32,                     "ConvertToVector128Int32",                          SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2dq,  INS_cvtpd2dq},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int32,               "ConvertScalarToVector128Int32",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation,       "ConvertToVector128Int32WithTruncation",            SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttps2dq, INS_cvttpd2dq},         HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int64,               "ConvertScalarToVector128Int64",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single,                    "ConvertToVector128Single",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2ps,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtpd2ps},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single,              "ConvertScalarToVector128Single",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2ss},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToDouble,                             "ConvertToDouble",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsd},             HW_Category_Helper,                            HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt32,                              "ConvertToInt32",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation,                "ConvertToInt32WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt64,                              "ConvertToInt64",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt64WithTruncation,                "ConvertToInt64WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToUInt32,                             "ConvertToUInt32",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToUInt64,                             "ConvertToUInt64",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double,                    "ConvertToVector128Double",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2pd,  INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2pd,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double,              "ConvertScalarToVector128Double",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtss2sd,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_SecondArgMaybe64Bit|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32,                     "ConvertToVector128Int32",                          SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2dq,  INS_cvtpd2dq},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int32,               "ConvertScalarToVector128Int32",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantic|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation,       "ConvertToVector128Int32WithTruncation",            SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttps2dq, INS_cvttpd2dq},         HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantic
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int64,               "ConvertScalarToVector128Int64",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_64BitOnly|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single,                    "ConvertToVector128Single",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2ps,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtpd2ps},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single,              "ConvertScalarToVector128Single",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2ss,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt32,              "ConvertScalarToVector128UInt32",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt64,              "ConvertScalarToVector128UInt64",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt64,              "ConvertScalarToVector128UInt64",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Divide,                                      "Divide",                                           SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_DivideScalar,                                "DivideScalar",                                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128,                        "LoadAlignedVector128",                             SSE2,       -1,           16,          1,            {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_invalid,   INS_movapd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
@@ -199,16 +199,16 @@ HARDWARE_INTRINSIC(SSE2_Min,                                         "Min",
 HARDWARE_INTRINSIC(SSE2_MinScalar,                                   "MinScalar",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_minsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_MoveMask,                                    "MoveMask",                                         SSE2,       -1,           16,          1,            {INS_pmovmskb,  INS_pmovmskb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movmskpd},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_MoveScalar,                                  "MoveScalar",                                       SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsdsse2},         HW_Category_SIMDScalar,                        HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE2_Multiply,                                    "Multiply",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuludq,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_Multiply,                                    "Multiply",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuludq,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(SSE2_MultiplyHigh,                                "MultiplyHigh",                                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_pmulhw,    INS_pmulhuw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MultiplyHorizontalAdd,                       "MultiplyHorizontalAdd",                            SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_pmaddwd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_MultiplyHorizontalAdd,                       "MultiplyHorizontalAdd",                            SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_pmaddwd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(SSE2_MultiplyLow,                                 "MultiplyLow",                                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_pmullw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_MultiplyScalar,                              "MultiplyScalar",                                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_Or,                                          "Or",                                               SSE2,       -1,           16,          2,            {INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_invalid,   INS_orpd},              HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_PackSignedSaturate,                          "PackSignedSaturate",                               SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_packsswb,  INS_invalid,   INS_packssdw,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_PackUnsignedSaturate,                        "PackUnsignedSaturate",                             SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_packuswb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_PackSignedSaturate,                          "PackSignedSaturate",                               SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_packsswb,  INS_invalid,   INS_packssdw,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE2_PackUnsignedSaturate,                        "PackUnsignedSaturate",                             SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_packuswb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(SSE2_SetZeroVector128,                            "SetZeroVector128",                                 SSE2,       -1,           16,          0,            {INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_invalid,   INS_xorpd},             HW_Category_Helper,                            HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_SumAbsoluteDifferences,                      "SumAbsoluteDifferences",                           SSE2,       -1,           16,          2,            {INS_invalid,   INS_psadbw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE2_SumAbsoluteDifferences,                      "SumAbsoluteDifferences",                           SSE2,       -1,           16,          2,            {INS_invalid,   INS_psadbw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical,                            "ShiftLeftLogical",                                 SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_psllw,     INS_psllw,     INS_pslld,     INS_pslld,     INS_psllq,     INS_psllq,     INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane,                  "ShiftLeftLogical128BitLane",                       SSE2,       -1,           16,          2,            {INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic,                        "ShiftRightArithmetic",                             SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_psraw,     INS_invalid,   INS_psrad,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
@@ -296,12 +296,12 @@ HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinity,                    "RoundToPos
 HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinityScalar,              "RoundToPositiveInfinityScalar",                    SSE41,      10,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE41_RoundToZero,                                "RoundToZero",                                      SSE41,      11,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_RoundToZeroScalar,                          "RoundToZeroScalar",                                SSE41,      11,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_TestAllOnes,                                "TestAllOnes",                                      SSE41,      -1,           16,           1,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_TestAllZeros,                               "TestAllZeros",                                     SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestC,                                      "TestC",                                            SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros,                           "TestMixOnesZeros",                                 SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC,                            "TestNotZAndNotC",                                  SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE41_TestZ,                                      "TestZ",                                            SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE41_TestAllOnes,                                "TestAllOnes",                                      SSE41,      -1,           16,           1,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestAllZeros,                               "TestAllZeros",                                     SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestC,                                      "TestC",                                            SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros,                           "TestMixOnesZeros",                                 SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC,                            "TestNotZAndNotC",                                  SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestZ,                                      "TestZ",                                            SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
 
 //  SSE42 Intrinsics
 HARDWARE_INTRINSIC(SSE42_IsSupported,                                "get_IsSupported",                                  SSE42,      -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
index 36f48ba..d34345e 100644 (file)
@@ -551,10 +551,34 @@ bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
                                                     isFullyImplmentedISAClass(isa));
 }
 
-static bool isTypeSupportedForIntrinsic(var_types type)
+//------------------------------------------------------------------------
+// hwIntrinsicSignatureTypeSupported: platform support of hardware intrinsics
+//
+// Arguments:
+//    retType - return type
+//    sig     - intrinsic signature
+//    flags   - flags of the intrinsics
+//
+// Return Value:
+//    On 32-bit platforms, return true if the intrinsic does not use any 64-bit registers (r64)
+bool Compiler::hwIntrinsicSignatureTypeSupported(var_types retType, CORINFO_SIG_INFO* sig, HWIntrinsicFlag flags)
 {
 #ifdef _TARGET_X86_
-    return !varTypeIsLong(type);
+    CORINFO_CLASS_HANDLE argClass;
+
+    if (((flags & HW_Flag_64BitOnly) != 0))
+    {
+        return false;
+    }
+    else if ((flags & HW_Flag_SecondArgMaybe64Bit) != 0)
+    {
+        assert(sig->numArgs >= 2);
+        CorInfoType corType =
+            strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(sig->args), &argClass));
+        return !varTypeIsLong(JITtype2varType(corType));
+    }
+
+    return !varTypeIsLong(retType);
 #else
     return true;
 #endif
@@ -609,9 +633,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
     // This intrinsic is supported if
     // - the ISA is available on the underlying hardware (compSupports returns true)
     // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
-    // - intrinsics do not require 64-bit registers (r64) on 32-bit platforms (isTypeSupportedForIntrinsic returns
+    // - intrinsics do not require 64-bit registers (r64) on 32-bit platforms (signatureTypeSupproted returns
     // true)
-    bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa) && isTypeSupportedForIntrinsic(retType);
+    bool issupported =
+        compSupports(isa) && compSupportsHWIntrinsic(isa) && hwIntrinsicSignatureTypeSupported(retType, sig, flags);
 
     if (category == HW_Category_IsSupportedProperty)
     {
@@ -656,17 +681,25 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
 
     bool isTableDriven = impIsTableDrivenHWIntrinsic(category, flags);
 
-    if (isTableDriven && (!varTypeIsSIMD(retType) || ((flags & HW_Flag_BaseTypeFromArg) != 0)))
+    if (isTableDriven && (category == HW_Category_MemoryStore ||
+                          ((flags & (HW_Flag_BaseTypeFromFirstArg | HW_Flag_BaseTypeFromSecondArg)) != 0)))
     {
-        if (retType != TYP_VOID)
+        if ((flags & HW_Flag_BaseTypeFromFirstArg) != 0)
         {
             baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
         }
         else
         {
-            assert(category == HW_Category_MemoryStore);
-            baseType =
-                getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, info.compCompHnd->getArgNext(sig->args)));
+            assert(category == HW_Category_MemoryStore || ((flags & HW_Flag_BaseTypeFromSecondArg) != 0));
+            CORINFO_ARG_LIST_HANDLE secondArg      = info.compCompHnd->getArgNext(sig->args);
+            CORINFO_CLASS_HANDLE    secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
+            baseType                               = getBaseTypeOfSIMDType(secondArgClass);
+
+            if (baseType == TYP_UNKNOWN) // the second argument is not a vector
+            {
+                baseType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, secondArg, &secondArgClass)));
+                assert(baseType != TYP_STRUCT);
+            }
         }
 
         assert(baseType != TYP_UNKNOWN);
@@ -769,10 +802,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
             return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_SSE2:
             return impSSE2Intrinsic(intrinsic, method, sig, mustExpand);
-        case InstructionSet_SSE3:
-            return impSSE3Intrinsic(intrinsic, method, sig, mustExpand);
-        case InstructionSet_SSSE3:
-            return impSSSE3Intrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_SSE41:
             return impSSE41Intrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_SSE42:
@@ -837,30 +866,6 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_SSE_ConvertScalarToVector128Single:
-        {
-            assert(sig->numArgs == 2);
-            assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
-
-#ifdef _TARGET_X86_
-            CORINFO_CLASS_HANDLE argClass;
-
-            CORINFO_ARG_LIST_HANDLE argLst = info.compCompHnd->getArgNext(sig->args);
-            CorInfoType             corType =
-                strip(info.compCompHnd->getArgType(sig, argLst, &argClass)); // type of the second argument
-
-            if (varTypeIsLong(JITtype2varType(corType)))
-            {
-                return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-            }
-#endif // _TARGET_X86_
-
-            op2     = impPopStack().val;
-            op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, simdSize);
-            break;
-        }
-
         case NI_SSE_ReciprocalScalar:
         case NI_SSE_ReciprocalSqrtScalar:
         case NI_SSE_SqrtScalar:
@@ -959,88 +964,6 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_SSE2_ConvertScalarToVector128Double:
-        {
-            assert(sig->numArgs == 2);
-            assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_DOUBLE);
-
-            argList = info.compCompHnd->getArgNext(sig->args);
-            CorInfoType corType =
-                strip(info.compCompHnd->getArgType(sig, argList, &argClass)); // type of the second argument
-
-            baseType = JITtype2varType(corType);
-
-#ifdef _TARGET_X86_
-            if (varTypeIsLong(JITtype2varType(corType)))
-            {
-                return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-            }
-#endif // _TARGET_X86_
-
-            if (baseType == TYP_STRUCT)
-            {
-                baseType = TYP_FLOAT; // it is the only type passed as Vector
-                op2      = impSIMDPopStack(TYP_SIMD16);
-            }
-            else
-            {
-                op2 = impPopStack().val;
-            }
-
-            op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize);
-
-            break;
-        }
-
-        case NI_SSE2_ConvertScalarToVector128Int64:
-        case NI_SSE2_ConvertScalarToVector128UInt64:
-        {
-            assert(sig->numArgs == 1);
-            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
-            assert(baseType == TYP_LONG || baseType == TYP_ULONG);
-
-#ifdef _TARGET_X86_
-            return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
-#endif // _TARGET_X86_
-
-            op1     = impPopStack().val;
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, baseType, simdSize);
-            break;
-        }
-
-        case NI_SSE2_ConvertScalarToVector128Single:
-        {
-            assert(sig->numArgs == 2);
-            assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
-
-            op2     = impSIMDPopStack(TYP_SIMD16);
-            op1     = impSIMDPopStack(TYP_SIMD16);
-            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_DOUBLE, simdSize);
-            break;
-        }
-
-        case NI_SSE2_ConvertToInt32:
-        case NI_SSE2_ConvertToInt64:
-        {
-            assert(sig->numArgs == 1);
-            op1      = impSIMDPopStack(TYP_SIMD16);
-            retType  = JITtype2varType(sig->retType);
-            baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
-            retNode  = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
-            break;
-        }
-
-        case NI_SSE2_ConvertToUInt32:
-        case NI_SSE2_ConvertToUInt64:
-        {
-            assert(sig->numArgs == 1);
-            op1      = impSIMDPopStack(TYP_SIMD16);
-            baseType = JITtype2varType(sig->retType);
-            retNode  = gtNewSimdHWIntrinsicNode(baseType, op1, intrinsic, baseType, simdSize);
-            break;
-        }
-
         case NI_SSE2_LoadFence:
         case NI_SSE2_MemoryFence:
         {
@@ -1070,22 +993,6 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
     return retNode;
 }
 
-GenTree* Compiler::impSSE3Intrinsic(NamedIntrinsic        intrinsic,
-                                    CORINFO_METHOD_HANDLE method,
-                                    CORINFO_SIG_INFO*     sig,
-                                    bool                  mustExpand)
-{
-    return nullptr;
-}
-
-GenTree* Compiler::impSSSE3Intrinsic(NamedIntrinsic        intrinsic,
-                                     CORINFO_METHOD_HANDLE method,
-                                     CORINFO_SIG_INFO*     sig,
-                                     bool                  mustExpand)
-{
-    return nullptr;
-}
-
 GenTree* Compiler::impSSE41Intrinsic(NamedIntrinsic        intrinsic,
                                      CORINFO_METHOD_HANDLE method,
                                      CORINFO_SIG_INFO*     sig,
index 7ad997e..96c6b86 100644 (file)
@@ -75,8 +75,8 @@ enum HWIntrinsicFlag : unsigned int
     // some SIMD scalar intrinsics need the semantics of copying upper bits from the source operand
     HW_Flag_CopyUpperBits = 0x200,
 
-    // Select base type using argument type
-    HW_Flag_BaseTypeFromArg = 0x400,
+    // Select base type using the first argument type
+    HW_Flag_BaseTypeFromFirstArg = 0x400,
 
     // Indicates compFloatingPointUsed does not need to be set.
     HW_Flag_NoFloatingPointUsed = 0x800,
@@ -89,9 +89,22 @@ enum HWIntrinsicFlag : unsigned int
     // the imm intrinsic does not need jumptable fallback when it gets non-const argument
     HW_Flag_NoJmpTableIMM = 0x2000,
 
+    // 64-bit intrinsics
+    // Intrinsics that operate over 64-bit general purpose registers are not supported on 32-bit platform
+    HW_Flag_64BitOnly           = 0x4000,
+    HW_Flag_SecondArgMaybe64Bit = 0x8000,
+
+    // Select base type using the second argument type
+    HW_Flag_BaseTypeFromSecondArg = 0x10000,
+
+    // Specail codegen
+    // the intrinsics need special rules in CodeGen,
+    // but can be table-driven in the front-end
+    HW_Flag_SpecialCodeGen = 0x20000,
+
     // No Read/Modify/Write Semantics
     // the intrinsic does not have read/modify/write semantics and doesn't need
-    HW_Flag_NoRMWSemantics = 0x4000,
+    HW_Flag_NoRMWSemantics = 0x40000,
 };
 
 inline HWIntrinsicFlag operator|(HWIntrinsicFlag c1, HWIntrinsicFlag c2)