Adding some asserts that we won't overwrite one of the hwintrinsic operand registers.
authorTanner Gooding <tagoo@outlook.com>
Tue, 27 Feb 2018 15:45:35 +0000 (07:45 -0800)
committerTanner Gooding <tagoo@outlook.com>
Wed, 28 Feb 2018 15:19:27 +0000 (07:19 -0800)
src/jit/emitxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/lsraxarch.cpp
src/jit/namedintrinsiclist.h

index 2753c2d..64a7635 100644 (file)
@@ -5490,6 +5490,9 @@ void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg,
     {
         if (reg1 != reg)
         {
+            // Ensure we aren't overwriting op2
+            assert(reg2 != reg);
+
             emitIns_R_R(INS_movaps, attr, reg, reg1);
         }
         emitIns_R_R(ins, attr, reg, reg2);
@@ -5569,10 +5572,18 @@ void emitter::emitIns_SIMD_R_R_R_R(
         // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
         if (reg3 != REG_XMM0)
         {
+            // Ensure we aren't overwriting op1 or op2
+            assert(reg1 != REG_XMM0);
+            assert(reg2 != REG_XMM0);
+
             emitIns_R_R(INS_movaps, attr, REG_XMM0, reg3);
         }
         if (reg1 != reg)
         {
+            // Ensure we aren't overwriting op2 or op3
+            assert(reg2 != reg);
+            assert((reg3 == REG_XMM0) || (reg != REG_XMM0));
+
             emitIns_R_R(INS_movaps, attr, reg, reg1);
         }
         emitIns_R_R(ins, attr, reg, reg2);
@@ -5657,6 +5668,9 @@ void emitter::emitIns_SIMD_R_R_R_I(
     {
         if (reg1 != reg)
         {
+            // Ensure we aren't overwriting op2
+            assert(reg2 != reg);
+
             emitIns_R_R(INS_movaps, attr, reg, reg1);
         }
         emitIns_R_R_I(ins, attr, reg, reg2, ival);
index be9d031..cc4710a 100644 (file)
@@ -33,29 +33,29 @@ HARDWARE_INTRINSIC(SSE_AddScalar,                                    "AddScalar"
 HARDWARE_INTRINSIC(SSE_And,                                          "And",                                              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_andps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE_AndNot,                                       "AndNot",                                           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_andnps,    INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_CompareEqual,                                 "CompareEqual",                                     SSE,         0,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar,                    "CompareEqualOrderedScalar",                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar,                    "CompareEqualOrderedScalar",                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareEqualScalar,                           "CompareEqualScalar",                               SSE,         0,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar,                  "CompareEqualUnorderedScalar",                      SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar,                  "CompareEqualUnorderedScalar",                      SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThan,                           "CompareGreaterThan",                               SSE,         6,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar,              "CompareGreaterThanOrderedScalar",                  SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar,              "CompareGreaterThanOrderedScalar",                  SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar,                     "CompareGreaterThanScalar",                         SSE,         6,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar,            "CompareGreaterThanUnorderedScalar",                SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar,            "CompareGreaterThanUnorderedScalar",                SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual,                    "CompareGreaterThanOrEqual",                        SSE,         5,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar,       "CompareGreaterThanOrEqualOrderedScalar",           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar,       "CompareGreaterThanOrEqualOrderedScalar",           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar,              "CompareGreaterThanOrEqualScalar",                  SSE,         5,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar,     "CompareGreaterThanOrEqualUnorderedScalar",         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar,     "CompareGreaterThanOrEqualUnorderedScalar",         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThan,                              "CompareLessThan",                                  SSE,         1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar,                 "CompareLessThanOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar,                 "CompareLessThanOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThanScalar,                        "CompareLessThanScalar",                            SSE,         1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar,               "CompareLessThanUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar,               "CompareLessThanUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual,                       "CompareLessThanOrEqual",                           SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar,          "CompareLessThanOrEqualOrderedScalar",              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar,          "CompareLessThanOrEqualOrderedScalar",              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar,                 "CompareLessThanOrEqualScalar",                     SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar,        "CompareLessThanOrEqualUnorderedScalar",            SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar,        "CompareLessThanOrEqualUnorderedScalar",            SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareNotEqual,                              "CompareNotEqual",                                  SSE,         4,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar,                 "CompareNotEqualOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar,                 "CompareNotEqualOrderedScalar",                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comiss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar,                        "CompareNotEqualScalar",                            SSE,         4,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar,               "CompareNotEqualUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar,               "CompareNotEqualUnorderedScalar",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomiss,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan,                        "CompareNotGreaterThan",                            SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar,                  "CompareNotGreaterThanScalar",                      SSE,         2,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual,                 "CompareNotGreaterThanOrEqual",                     SSE,         1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
@@ -68,53 +68,53 @@ HARDWARE_INTRINSIC(SSE_CompareOrdered,                               "CompareOrd
 HARDWARE_INTRINSIC(SSE_CompareOrderedScalar,                         "CompareOrderedScalar",                             SSE,         7,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_CompareUnordered,                             "CompareUnordered",                                 SSE,         3,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar,                       "CompareUnorderedScalar",                           SSE,         3,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32,                               "ConvertToInt32",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64,                               "ConvertToInt64",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToSingle,                              "ConvertToSingle",                                  SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_Helper,                            HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single,               "ConvertScalarToVector128Single",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2ss,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation,                 "ConvertToInt32WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation,                 "ConvertToInt64WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32,                               "ConvertToInt32",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64,                               "ConvertToInt64",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtss2si,  INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToSingle,                              "ConvertToSingle",                                  SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_Helper,                            HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single,               "ConvertScalarToVector128Single",                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2ss,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation,                 "ConvertToInt32WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation,                 "ConvertToInt64WithTruncation",                     SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttss2si, INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_Divide,                                       "Divide",                                           SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_DivideScalar,                                 "DivideScalar",                                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_LoadAlignedVector128,                         "LoadAlignedVector128",                             SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movaps,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadHigh,                                     "LoadHigh",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhps,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadLow,                                      "LoadLow",                                          SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movlps,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadScalarVector128,                          "LoadScalarVector128",                              SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadVector128,                                "LoadVector128",                                    SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movups,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadAlignedVector128,                         "LoadAlignedVector128",                             SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movaps,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_LoadHigh,                                     "LoadHigh",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhps,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_LoadLow,                                      "LoadLow",                                          SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movlps,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_LoadScalarVector128,                          "LoadScalarVector128",                              SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_LoadVector128,                                "LoadVector128",                                    SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movups,    INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_Max,                                          "Max",                                              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE_MaxScalar,                                    "MaxScalar",                                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_Min,                                          "Min",                                              SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_minps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE_MinScalar,                                    "MinScalar",                                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_minss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_MoveHighToLow,                                "MoveHighToLow",                                    SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhlps,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(SSE_MoveLowToHigh,                                "MoveLowToHigh",                                    SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movlhps,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_MoveMask,                                     "MoveMask",                                         SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movmskps,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_MoveMask,                                     "MoveMask",                                         SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movmskps,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_MoveScalar,                                   "MoveScalar",                                       SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(SSE_Multiply,                                     "Multiply",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE_MultiplyScalar,                               "MultiplyScalar",                                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_Or,                                           "Or",                                               SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_orps,      INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_Prefetch0,                                    "Prefetch0",                                        SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetcht0,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Prefetch1,                                    "Prefetch1",                                        SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetcht1,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Prefetch2,                                    "Prefetch2",                                        SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetcht2,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_PrefetchNonTemporal,                          "PrefetchNonTemporal",                              SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetchnta, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Reciprocal,                                   "Reciprocal",                                       SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rcpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Prefetch0,                                    "Prefetch0",                                        SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetcht0,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_Prefetch1,                                    "Prefetch1",                                        SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetcht1,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_Prefetch2,                                    "Prefetch2",                                        SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetcht2,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_PrefetchNonTemporal,                          "PrefetchNonTemporal",                              SSE,        -1,            0,           1,           {INS_invalid,   INS_prefetchnta, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_Reciprocal,                                   "Reciprocal",                                       SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rcpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_ReciprocalScalar,                             "ReciprocalScalar",                                 SSE,        -1,           16,          -1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rcpss,     INS_invalid},           HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ReciprocalSqrt,                               "ReciprocalSqrt",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rsqrtps,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ReciprocalSqrt,                               "ReciprocalSqrt",                                   SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rsqrtps,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_ReciprocalSqrtScalar,                         "ReciprocalSqrtScalar",                             SSE,        -1,           16,          -1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rsqrtss,   INS_invalid},           HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_SetAllVector128,                              "SetAllVector128",                                  SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Helper,                            HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(SSE_SetScalarVector128,                           "SetScalarVector128",                               SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_Helper,                            HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(SSE_SetAllVector128,                              "SetAllVector128",                                  SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Helper,                            HW_Flag_NoCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_SetScalarVector128,                           "SetScalarVector128",                               SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_Helper,                            HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_SetVector128,                                 "SetVector128",                                     SSE,        -1,           16,           4,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Helper,                            HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(SSE_SetZeroVector128,                             "SetZeroVector128",                                 SSE,        -1,           16,           0,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_xorps,     INS_invalid},           HW_Category_Helper,                            HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_SetZeroVector128,                             "SetZeroVector128",                                 SSE,        -1,           16,           0,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_xorps,     INS_invalid},           HW_Category_Helper,                            HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_Shuffle,                                      "Shuffle",                                          SSE,        -1,           16,           3,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_shufps,    INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE_Sqrt,                                         "Sqrt",                                             SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtps,    INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Sqrt,                                         "Sqrt",                                             SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtps,    INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_SqrtScalar,                                   "SqrtScalar",                                       SSE,        -1,           16,          -1,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtss,    INS_invalid},           HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_StaticCast,                                   "StaticCast",                                       SSE,        -1,           16,           1,           {INS_movaps,    INS_movaps,      INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps},            HW_Category_Helper,                            HW_Flag_TwoTypeGeneric)
-HARDWARE_INTRINSIC(SSE_Store,                                        "Store",                                            SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movups,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreAligned,                                 "StoreAligned",                                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movaps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal,                      "StoreAlignedNonTemporal",                          SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movntps,   INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreFence,                                   "StoreFence",                                       SSE,        -1,            0,           0,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreHigh,                                    "StoreHigh",                                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreLow,                                     "StoreLow",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movlps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StoreScalar,                                  "StoreScalar",                                      SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StaticCast,                                   "StaticCast",                                       SSE,        -1,           16,           1,           {INS_movaps,    INS_movaps,      INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps},            HW_Category_Helper,                            HW_Flag_TwoTypeGeneric|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_Store,                                        "Store",                                            SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movups,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_StoreAligned,                                 "StoreAligned",                                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movaps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal,                      "StoreAlignedNonTemporal",                          SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movntps,   INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_StoreFence,                                   "StoreFence",                                       SSE,        -1,            0,           0,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_StoreHigh,                                    "StoreHigh",                                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_StoreLow,                                     "StoreLow",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movlps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_StoreScalar,                                  "StoreScalar",                                      SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_Subtract,                                     "Subtract",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_subps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_SubtractScalar,                               "SubtractScalar",                                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_subss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_UnpackHigh,                                   "UnpackHigh",                                       SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_unpckhps,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
@@ -134,29 +134,29 @@ HARDWARE_INTRINSIC(SSE2_And,                                         "And",
 HARDWARE_INTRINSIC(SSE2_AndNot,                                      "AndNot",                                           SSE2,       -1,           16,          2,            {INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_pandn,     INS_invalid,   INS_andnpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_Average,                                     "Average",                                          SSE2,       -1,           16,          2,            {INS_invalid,   INS_pavgb,     INS_invalid,   INS_pavgw,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_CompareEqual,                                "CompareEqual",                                     SSE2,        0,           16,          2,            {INS_pcmpeqb,   INS_pcmpeqb,   INS_pcmpeqw,   INS_pcmpeqw,   INS_pcmpeqd,   INS_pcmpeqd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar,                   "CompareEqualOrderedScalar",                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar,                   "CompareEqualOrderedScalar",                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareEqualScalar,                          "CompareEqualScalar",                               SSE2,        0,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar,                 "CompareEqualUnorderedScalar",                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar,                 "CompareEqualUnorderedScalar",                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThan,                          "CompareGreaterThan",                               SSE2,        6,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar,             "CompareGreaterThanOrderedScalar",                  SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar,             "CompareGreaterThanOrderedScalar",                  SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThanScalar,                    "CompareGreaterThanScalar",                         SSE2,        6,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar,           "CompareGreaterThanUnorderedScalar",                SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar,           "CompareGreaterThanUnorderedScalar",                SSE2,       -1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual,                   "CompareGreaterThanOrEqual",                        SSE2,        5,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar,      "CompareGreaterThanOrEqualOrderedScalar",           SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar,      "CompareGreaterThanOrEqualOrderedScalar",           SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualScalar,             "CompareGreaterThanOrEqualScalar",                  SSE2,        5,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar,    "CompareGreaterThanOrEqualUnorderedScalar",         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar,    "CompareGreaterThanOrEqualUnorderedScalar",         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareLessThan,                             "CompareLessThan",                                  SSE2,        1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar,                "CompareLessThanOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar,                "CompareLessThanOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareLessThanScalar,                       "CompareLessThanScalar",                            SSE2,        1,           16,          2,            {INS_pcmpgtb,   INS_invalid,   INS_pcmpgtw,   INS_invalid,   INS_pcmpgtd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar,              "CompareLessThanUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar,              "CompareLessThanUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqual,                      "CompareLessThanOrEqual",                           SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar,         "CompareLessThanOrEqualOrderedScalar",              SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar,         "CompareLessThanOrEqualOrderedScalar",              SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualScalar,                "CompareLessThanOrEqualScalar",                     SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar,       "CompareLessThanOrEqualUnorderedScalar",            SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar,       "CompareLessThanOrEqualUnorderedScalar",            SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareNotEqual,                             "CompareNotEqual",                                  SSE2,        4,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar,                "CompareNotEqualOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar,                "CompareNotEqualOrderedScalar",                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_comisd},            HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareNotEqualScalar,                       "CompareNotEqualScalar",                            SSE2,        4,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar,              "CompareNotEqualUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar,              "CompareNotEqualUnorderedScalar",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_ucomisd},           HW_Category_SIMDScalar,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan,                       "CompareNotGreaterThan",                            SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanScalar,                 "CompareNotGreaterThanScalar",                      SSE2,        2,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual,                "CompareNotGreaterThanOrEqual",                     SSE2,        1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
@@ -169,35 +169,35 @@ HARDWARE_INTRINSIC(SSE2_CompareOrdered,                              "CompareOrd
 HARDWARE_INTRINSIC(SSE2_CompareOrderedScalar,                        "CompareOrderedScalar",                             SSE2,        7,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_CompareUnordered,                            "CompareUnordered",                                 SSE2,        3,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmppd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_CompareUnorderedScalar,                      "CompareUnorderedScalar",                           SSE2,        3,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_ConvertToDouble,                             "ConvertToDouble",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsd},             HW_Category_Helper,                            HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32,                              "ConvertToInt32",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation,                "ConvertToInt32WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64,                              "ConvertToInt64",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64WithTruncation,                "ConvertToInt64WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt32,                             "ConvertToUInt32",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt64,                             "ConvertToUInt64",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double,                    "ConvertToVector128Double",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2pd,  INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2pd,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double,              "ConvertScalarToVector128Double",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtss2sd,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32,                     "ConvertToVector128Int32",                          SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2dq,  INS_cvtpd2dq},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int32,               "ConvertScalarToVector128Int32",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation,       "ConvertToVector128Int32WithTruncation",            SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttps2dq, INS_cvttpd2dq},         HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int64,               "ConvertScalarToVector128Int64",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single,                    "ConvertToVector128Single",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2ps,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtpd2ps},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single,              "ConvertScalarToVector128Single",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2ss},          HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt32,              "ConvertScalarToVector128UInt32",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt64,              "ConvertScalarToVector128UInt64",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_ConvertToDouble,                             "ConvertToDouble",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsd},             HW_Category_Helper,                            HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt32,                              "ConvertToInt32",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation,                "ConvertToInt32WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt64,                              "ConvertToInt64",                                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_cvtsd2si},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToInt64WithTruncation,                "ConvertToInt64WithTruncation",                     SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttsd2si},         HW_Category_SIMDScalar,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToUInt32,                             "ConvertToUInt32",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToUInt64,                             "ConvertToUInt64",                                  SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_xmm2i, INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double,                    "ConvertToVector128Double",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2pd,  INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2pd,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double,              "ConvertScalarToVector128Double",                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtsi2sd,  INS_invalid,   INS_cvtss2sd,  INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32,                     "ConvertToVector128Int32",                          SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtps2dq,  INS_cvtpd2dq},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int32,               "ConvertScalarToVector128Int32",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation,       "ConvertToVector128Int32WithTruncation",            SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvttps2dq, INS_cvttpd2dq},         HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int64,               "ConvertScalarToVector128Int64",                    SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single,                    "ConvertToVector128Single",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2ps,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtpd2ps},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single,              "ConvertScalarToVector128Single",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtsd2ss},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt32,              "ConvertScalarToVector128UInt32",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt64,              "ConvertScalarToVector128UInt64",                   SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mov_i2xmm, INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Divide,                                      "Divide",                                           SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_DivideScalar,                                "DivideScalar",                                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128,                        "LoadAlignedVector128",                             SSE2,       -1,           16,          1,            {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_invalid,   INS_movapd},            HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_LoadFence,                                   "LoadFence",                                        SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_LoadScalarVector128,                         "LoadScalarVector128",                              SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movd,      INS_movd,      INS_movq,      INS_movq,      INS_invalid,   INS_movsdsse2},         HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_LoadVector128,                               "LoadVector128",                                    SSE2,       -1,           16,          1,            {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_invalid,   INS_movupd},            HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128,                        "LoadAlignedVector128",                             SSE2,       -1,           16,          1,            {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_invalid,   INS_movapd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_LoadFence,                                   "LoadFence",                                        SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_LoadScalarVector128,                         "LoadScalarVector128",                              SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movd,      INS_movd,      INS_movq,      INS_movq,      INS_invalid,   INS_movsdsse2},         HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_LoadVector128,                               "LoadVector128",                                    SSE2,       -1,           16,          1,            {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_invalid,   INS_movupd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Max,                                         "Max",                                              SSE2,       -1,           16,          2,            {INS_invalid,   INS_pmaxub,    INS_pmaxsw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MemoryFence,                                 "MemoryFence",                                      SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_MemoryFence,                                 "MemoryFence",                                      SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_MaxScalar,                                   "MaxScalar",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_Min,                                         "Min",                                              SSE2,       -1,           16,          2,            {INS_invalid,   INS_pminub,    INS_pminsw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_minpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_MinScalar,                                   "MinScalar",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_minsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_MoveMask,                                    "MoveMask",                                         SSE2,       -1,           16,          1,            {INS_pmovmskb,  INS_pmovmskb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movmskpd},          HW_Category_Special,                           HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_MoveMask,                                    "MoveMask",                                         SSE2,       -1,           16,          1,            {INS_pmovmskb,  INS_pmovmskb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movmskpd},          HW_Category_Special,                           HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_MoveScalar,                                  "MoveScalar",                                       SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsdsse2},         HW_Category_SIMDScalar,                        HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(SSE2_Multiply,                                    "Multiply",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuludq,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
 HARDWARE_INTRINSIC(SSE2_MultiplyHigh,                                "MultiplyHigh",                                     SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_pmulhw,    INS_pmulhuw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
@@ -207,21 +207,21 @@ HARDWARE_INTRINSIC(SSE2_MultiplyScalar,                              "MultiplySc
 HARDWARE_INTRINSIC(SSE2_Or,                                          "Or",                                               SSE2,       -1,           16,          2,            {INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_invalid,   INS_orpd},              HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_PackSignedSaturate,                          "PackSignedSaturate",                               SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_packsswb,  INS_invalid,   INS_packssdw,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
 HARDWARE_INTRINSIC(SSE2_PackUnsignedSaturate,                        "PackUnsignedSaturate",                             SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_packuswb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE2_SetZeroVector128,                            "SetZeroVector128",                                 SSE2,       -1,           16,          0,            {INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_invalid,   INS_xorpd},             HW_Category_Helper,                            HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_SetZeroVector128,                            "SetZeroVector128",                                 SSE2,       -1,           16,          0,            {INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_invalid,   INS_xorpd},             HW_Category_Helper,                            HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_SumAbsoluteDifferences,                      "SumAbsoluteDifferences",                           SSE2,       -1,           16,          2,            {INS_invalid,   INS_psadbw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
 HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical,                            "ShiftLeftLogical",                                 SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_psllw,     INS_psllw,     INS_pslld,     INS_pslld,     INS_psllq,     INS_psllq,     INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane,                  "ShiftLeftLogical128BitLane",                       SSE2,       -1,           16,          2,            {INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_pslldq,    INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic,                        "ShiftRightArithmetic",                             SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_psraw,     INS_invalid,   INS_psrad,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftRightLogical,                           "ShiftRightLogical",                                SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_psrlw,     INS_psrlw,     INS_psrld,     INS_psrld,     INS_psrlq,     INS_psrlq,     INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE2_ShiftRightLogical128BitLane,                 "ShiftRightLogical128BitLane",                      SSE2,       -1,           16,          2,            {INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_psrldq,    INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_Sqrt,                                        "Sqrt",                                             SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_SqrtScalar,                                  "SqrtScalar",                                       SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtsd},            HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_Store,                                       "Store",                                            SSE2,       -1,           16,          2,            {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_invalid,   INS_movupd},            HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_StoreAligned,                                "StoreAligned",                                     SSE2,       -1,           16,          2,            {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_invalid,   INS_movapd},            HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal,                     "StoreAlignedNonTemporal",                          SSE2,       -1,           16,          2,            {INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_invalid,   INS_movntpd},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_StoreScalar,                                 "StoreScalar",                                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsdsse2},         HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_StoreLow,                                    "StoreLow",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movq,      INS_movq,      INS_invalid,   INS_movlpd},            HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_StoreHigh,                                   "StoreHigh",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhpd},            HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_Sqrt,                                        "Sqrt",                                             SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_SqrtScalar,                                  "SqrtScalar",                                       SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtsd},            HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_Store,                                       "Store",                                            SSE2,       -1,           16,          2,            {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_invalid,   INS_movupd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreAligned,                                "StoreAligned",                                     SSE2,       -1,           16,          2,            {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_invalid,   INS_movapd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal,                     "StoreAlignedNonTemporal",                          SSE2,       -1,           16,          2,            {INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_invalid,   INS_movntpd},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreScalar,                                 "StoreScalar",                                      SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsdsse2},         HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreLow,                                    "StoreLow",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movq,      INS_movq,      INS_invalid,   INS_movlpd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_StoreHigh,                                   "StoreHigh",                                        SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhpd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Subtract,                                    "Subtract",                                         SSE2,       -1,           16,          2,            {INS_psubb,     INS_psubb,     INS_psubw,     INS_psubw,     INS_psubd,     INS_psubd,     INS_psubq,     INS_psubq,     INS_invalid,   INS_subpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_SubtractSaturate,                            "SubtractSaturate",                                 SSE2,       -1,           16,          2,            {INS_psubsb,    INS_psubusb,   INS_psubsw,    INS_psubusw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_SubtractScalar,                              "SubtractScalar",                                   SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_subsd},             HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
@@ -238,11 +238,11 @@ HARDWARE_INTRINSIC(SSE3_IsSupported,                                 "get_IsSupp
 HARDWARE_INTRINSIC(SSE3_AddSubtract,                                 "AddSubtract",                                      SSE3,       -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_addsubps,  INS_addsubpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE3_HorizontalAdd,                               "HorizontalAdd",                                    SSE3,       -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_haddps,    INS_haddpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE3_HorizontalSubtract,                          "HorizontalSubtract",                               SSE3,       -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_hsubps,    INS_hsubpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE3_LoadAndDuplicateToVector128,                 "LoadAndDuplicateToVector128",                      SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_lddqu,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movddup},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE3_LoadDquVector128,                            "LoadDquVector128",                                 SSE3,       -1,           16,           1,           {INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE3_MoveAndDuplicate,                            "MoveAndDuplicate",                                 SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movddup},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE3_MoveHighAndDuplicate,                        "MoveHighAndDuplicate",                             SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movshdup,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate,                         "MoveLowAndDuplicate",                              SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsldup,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE3_LoadAndDuplicateToVector128,                 "LoadAndDuplicateToVector128",                      SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_lddqu,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movddup},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3_LoadDquVector128,                            "LoadDquVector128",                                 SSE3,       -1,           16,           1,           {INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3_MoveAndDuplicate,                            "MoveAndDuplicate",                                 SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movddup},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3_MoveHighAndDuplicate,                        "MoveHighAndDuplicate",                             SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movshdup,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate,                         "MoveLowAndDuplicate",                              SSE3,       -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsldup,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 
 // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                  Intrinsic ID                                      Function name                                      ISA        ival       SIMD size       NumArg                                                                   instructions                                                                                              Category                                         Flags
@@ -250,7 +250,7 @@ HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate,                         "MoveLowAnd
 // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSSE3 Intrinsics
 HARDWARE_INTRINSIC(SSSE3_IsSupported,                                "get_IsSupported",                                  SSSE3,      -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_Abs,                                        "Abs",                                              SSSE3,      -1,           16,           1,           {INS_invalid,   INS_pabsb,     INS_invalid,   INS_pabsw,     INS_invalid,   INS_pabsd,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3_Abs,                                        "Abs",                                              SSSE3,      -1,           16,           1,           {INS_invalid,   INS_pabsb,     INS_invalid,   INS_pabsw,     INS_invalid,   INS_pabsd,     INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSSE3_AlignRight,                                 "AlignRight",                                       SSSE3,      -1,           16,           3,           {INS_palignr,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSSE3_HorizontalAdd,                              "HorizontalAdd",                                    SSSE3,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_phaddw,    INS_invalid,   INS_phaddd,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSSE3_HorizontalAddSaturate,                      "HorizontalAddSaturate",                            SSSE3,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_phaddsw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
@@ -269,34 +269,34 @@ HARDWARE_INTRINSIC(SSSE3_Sign,                                       "Sign",
 HARDWARE_INTRINSIC(SSE41_IsSupported,                                "get_IsSupported",                                  SSE41,      -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_Blend,                                      "Blend",                                            SSE41,      -1,           16,           3,           {INS_invalid,   INS_invalid,   INS_pblendw,   INS_pblendw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_blendps,   INS_blendpd},          HW_Category_IMM,                               HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE41_BlendVariable,                              "BlendVariable",                                    SSE41,      -1,           16,           3,           {INS_pblendvb,  INS_pblendvb,  INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_blendvps,  INS_blendvpd},         HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_Ceiling,                                    "Ceiling",                                          SSE41,      10,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_Ceiling,                                    "Ceiling",                                          SSE41,      10,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_CeilingScalar,                              "CeilingScalar",                                    SSE41,      10,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE41_CompareEqual,                               "CompareEqual",                                     SSE41,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_pcmpeqq,   INS_pcmpeqq,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16,                    "ConvertToVector128Int16",                          SSE41,      -1,           16,           1,           {INS_pmovsxbw,  INS_pmovzxbw,  INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32,                    "ConvertToVector128Int32",                          SSE41,      -1,           16,           1,           {INS_pmovsxbd,  INS_pmovzxbd,  INS_pmovsxwd,  INS_pmovzxwd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64,                    "ConvertToVector128Int64",                          SSE41,      -1,           16,           1,           {INS_pmovsxbq,  INS_pmovzxbq,  INS_pmovsxwq,  INS_pmovzxwq,   INS_pmovsxdq,  INS_pmovzxdq,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16,                    "ConvertToVector128Int16",                          SSE41,      -1,           16,           1,           {INS_pmovsxbw,  INS_pmovzxbw,  INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32,                    "ConvertToVector128Int32",                          SSE41,      -1,           16,           1,           {INS_pmovsxbd,  INS_pmovzxbd,  INS_pmovsxwd,  INS_pmovzxwd,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64,                    "ConvertToVector128Int64",                          SSE41,      -1,           16,           1,           {INS_pmovsxbq,  INS_pmovzxbq,  INS_pmovsxwq,  INS_pmovzxwq,   INS_pmovsxdq,  INS_pmovzxdq,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoContainment|HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_DotProduct,                                 "DotProduct",                                       SSE41,      -1,           16,           3,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_dpps,      INS_dppd},             HW_Category_IMM,                               HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE41_Floor,                                      "Floor",                                            SSE41,       9,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_Floor,                                      "Floor",                                            SSE41,       9,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_FloorScalar,                                "FloorScalar",                                      SSE41,       9,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_LoadAlignedVector128NonTemporal,            "LoadAlignedVector128NonTemporal",                  SSE41,      -1,           16,           1,           {INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,   INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_invalid,   INS_invalid},          HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_LoadAlignedVector128NonTemporal,            "LoadAlignedVector128NonTemporal",                  SSE41,      -1,           16,           1,           {INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,   INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_invalid,   INS_invalid},          HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_Max,                                        "Max",                                              SSE41,      -1,           16,           2,           {INS_pmaxsb,    INS_invalid,   INS_invalid,   INS_pmaxuw,     INS_pmaxsd,    INS_pmaxud,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_Min,                                        "Min",                                              SSE41,      -1,           16,           2,           {INS_pminsb,    INS_invalid,   INS_invalid,   INS_pminuw,     INS_pminsd,    INS_pminud,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_MinHorizontal,                              "MinHorizontal",                                    SSE41,      -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_phminposuw, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_MinHorizontal,                              "MinHorizontal",                                    SSE41,      -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_phminposuw, INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_MultipleSumAbsoluteDifferences,             "MultipleSumAbsoluteDifferences",                   SSE41,      -1,           16,           3,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_mpsadbw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_IMM,                               HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE41_Multiply,                                   "Multiply",                                         SSE41,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_pmuldq,    INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE41_MultiplyLow,                                "MultiplyLow",                                      SSE41,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_pmulld,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_PackUnsignedSaturate,                       "PackUnsignedSaturate",                             SSE41,      -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_packusdw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_RoundCurrentDirection,                      "RoundCurrentDirection",                            SSE41,       4,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_RoundCurrentDirection,                      "RoundCurrentDirection",                            SSE41,       4,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_RoundCurrentDirectionScalar,                "RoundCurrentDirectionScalar",                      SSE41,       4,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToNearestInteger,                      "RoundToNearestInteger",                            SSE41,       8,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_RoundToNearestInteger,                      "RoundToNearestInteger",                            SSE41,       8,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_RoundToNearestIntegerScalar,                "RoundToNearestIntegerScalar",                      SSE41,       8,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToNegativeInfinity,                    "RoundToNegativeInfinity",                          SSE41,       9,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_RoundToNegativeInfinity,                    "RoundToNegativeInfinity",                          SSE41,       9,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_RoundToNegativeInfinityScalar,              "RoundToNegativeInfinityScalar",                    SSE41,       9,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinity,                    "RoundToPositiveInfinity",                          SSE41,      10,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinity,                    "RoundToPositiveInfinity",                          SSE41,      10,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinityScalar,              "RoundToPositiveInfinityScalar",                    SSE41,      10,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToZero,                                "RoundToZero",                                      SSE41,      11,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_RoundToZero,                                "RoundToZero",                                      SSE41,      11,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundps,   INS_roundpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_RoundToZeroScalar,                          "RoundToZeroScalar",                                SSE41,      11,           16,          -1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_roundss,   INS_roundsd},          HW_Category_Special,                           HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_TestAllOnes,                                "TestAllOnes",                                      SSE41,      -1,           16,           1,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE41_TestAllOnes,                                "TestAllOnes",                                      SSE41,      -1,           16,           1,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_TestAllZeros,                               "TestAllZeros",                                     SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(SSE41_TestC,                                      "TestC",                                            SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
 HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros,                           "TestMixOnesZeros",                                 SSE41,      -1,           16,           2,           {INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,      INS_ptest,     INS_ptest,     INS_ptest,     INS_ptest,     INS_invalid,   INS_invalid},          HW_Category_SimpleSIMD,                        HW_Flag_MultiIns|HW_Flag_NoContainment)
@@ -321,26 +321,26 @@ HARDWARE_INTRINSIC(AVX_AddSubtract,                                  "AddSubtrac
 HARDWARE_INTRINSIC(AVX_BlendVariable,                                "BlendVariable",                                    AVX,        -1,           32,           3,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_vblendvps, INS_vblendvpd},         HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_Compare,                                      "Compare",                                          AVX,        -1,           32,           3,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpps,     INS_cmppd},             HW_Category_IMM,                               HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_CompareScalar,                                "CompareScalar",                                    AVX,        -1,           16,           3,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cmpss,     INS_cmpsd},             HW_Category_IMM,                               HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(AVX_ConvertToSingle,                              "ConvertToSingle",                                  AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX_ConvertToSingle,                              "ConvertToSingle",                                  AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_Divide,                                       "Divide",                                           AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divps,     INS_divpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_DuplicateEvenIndexed,                         "DuplicateEvenIndexed",                             AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsldup,  INS_movddup},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_DuplicateOddIndexed,                          "DuplicateOddIndexed",                              AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movshdup,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX_DuplicateEvenIndexed,                         "DuplicateEvenIndexed",                             AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movsldup,  INS_movddup},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_DuplicateOddIndexed,                          "DuplicateOddIndexed",                              AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movshdup,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_HorizontalAdd,                                "HorizontalAdd",                                    AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_haddps,    INS_haddpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_HorizontalSubtract,                           "HorizontalSubtract",                               AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_hsubps,    INS_hsubpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_LoadAlignedVector256,                         "LoadAlignedVector256",                             AVX,        -1,           32,           1,           {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movaps,    INS_movapd},            HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_LoadDquVector256,                             "LoadDquVector256",                                 AVX,        -1,           32,           1,           {INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_LoadVector256,                                "LoadVector256",                                    AVX,        -1,           32,           1,           {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movups,    INS_movupd},            HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX_LoadAlignedVector256,                         "LoadAlignedVector256",                             AVX,        -1,           32,           1,           {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movaps,    INS_movapd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_LoadDquVector256,                             "LoadDquVector256",                                 AVX,        -1,           32,           1,           {INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_lddqu,     INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_LoadVector256,                                "LoadVector256",                                    AVX,        -1,           32,           1,           {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movups,    INS_movupd},            HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_Max,                                          "Max",                                              AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxps,     INS_maxpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX_Min,                                          "Min",                                              AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_minps,     INS_minpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX_Multiply,                                     "Multiply",                                         AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulps,     INS_mulpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX_Or,                                           "Or",                                               AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_orps,      INS_orpd},              HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_Reciprocal,                                   "Reciprocal",                                       AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rcpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_ReciprocalSqrt,                               "ReciprocalSqrt",                                   AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rsqrtps,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_SetZeroVector256,                             "SetZeroVector256",                                 AVX,        -1,           32,           0,           {INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_xorps,     INS_xorpd},             HW_Category_Helper,                            HW_Flag_OneTypeGeneric)
-HARDWARE_INTRINSIC(AVX_Sqrt,                                         "Sqrt",                                             AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtps,    INS_sqrtpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_Store,                                        "Store",                                            AVX,        -1,           32,           2,           {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movups,    INS_movupd},            HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_StoreAligned,                                 "StoreAligned",                                     AVX,        -1,           32,           2,           {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movaps,    INS_movapd},            HW_Category_MemoryStore,                       HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal,                      "StoreAlignedNonTemporal",                          AVX,        -1,           32,           2,           {INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntps,   INS_movntpd},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX_Reciprocal,                                   "Reciprocal",                                       AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rcpps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_ReciprocalSqrt,                               "ReciprocalSqrt",                                   AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_rsqrtps,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_SetZeroVector256,                             "SetZeroVector256",                                 AVX,        -1,           32,           0,           {INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_pxor,      INS_xorps,     INS_xorpd},             HW_Category_Helper,                            HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_Sqrt,                                         "Sqrt",                                             AVX,        -1,           32,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtps,    INS_sqrtpd},            HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_Store,                                        "Store",                                            AVX,        -1,           32,           2,           {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movups,    INS_movupd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_StoreAligned,                                 "StoreAligned",                                     AVX,        -1,           32,           2,           {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movaps,    INS_movapd},            HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal,                      "StoreAlignedNonTemporal",                          AVX,        -1,           32,           2,           {INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntdq,   INS_movntps,   INS_movntpd},           HW_Category_MemoryStore,                       HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX_Subtract,                                     "Subtract",                                         AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_subps,     INS_subpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_UnpackHigh,                                   "UnpackHigh",                                       AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_unpckhps,  INS_unpckhpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_UnpackLow,                                    "UnpackLow",                                        AVX,        -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_unpcklps,  INS_unpcklpd},          HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
@@ -348,7 +348,7 @@ HARDWARE_INTRINSIC(AVX_Xor,                                          "Xor",
 
 //  AVX2 Intrinsics
 HARDWARE_INTRINSIC(AVX2_IsSupported,                                 "get_IsSupported",                                  AVX2,       -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_Abs,                                         "Abs",                                              AVX2,       -1,           32,           1,           {INS_pabsb,     INS_pabsb,     INS_pabsw,     INS_pabsw,     INS_pabsd,     INS_pabsd,     INS_paddq,     INS_paddq,     INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_Abs,                                         "Abs",                                              AVX2,       -1,           32,           1,           {INS_pabsb,     INS_pabsb,     INS_pabsw,     INS_pabsw,     INS_pabsd,     INS_pabsd,     INS_paddq,     INS_paddq,     INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2_Add,                                         "Add",                                              AVX2,       -1,           32,           2,           {INS_paddb,     INS_paddb,     INS_paddw,     INS_paddw,     INS_paddd,     INS_paddd,     INS_paddq,     INS_paddq,     INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_AddSaturate,                                 "AddSaturate",                                      AVX2,       -1,           32,           2,           {INS_paddsb,    INS_paddusb,   INS_paddsw,    INS_paddusw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_And,                                         "And",                                              AVX2,       -1,           32,           2,           {INS_pand,      INS_pand,      INS_pand,      INS_pand,      INS_pand,      INS_pand,      INS_pand,      INS_pand,      INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
@@ -361,7 +361,7 @@ HARDWARE_INTRINSIC(AVX2_HorizontalAdd,                               "Horizontal
 HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate,                       "HorizontalAddSaturate",                            AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_phaddsw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_HorizontalSubtract,                          "HorizontalSubtract",                               AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_phsubw,    INS_invalid,   INS_phsubd,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate,                  "HorizontalSubtractSaturate",                       AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_phsubsw,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal,             "LoadAlignedVector256NonTemporal",                  AVX2,       -1,           32,           1,           {INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal,             "LoadAlignedVector256NonTemporal",                  AVX2,       -1,           32,           1,           {INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_movntdqa,  INS_invalid,   INS_invalid},           HW_Category_MemoryLoad,                        HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2_Multiply,                                    "Multiply",                                         AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuldq,    INS_pmuludq,   INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Or,                                          "Or",                                               AVX2,       -1,           32,           2,           {INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_por,       INS_invalid,   INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical,                            "ShiftLeftLogical",                                 AVX2,       -1,           32,           2,           {INS_invalid,   INS_invalid,   INS_psllw,     INS_psllw,     INS_pslld,     INS_pslld,     INS_psllq,     INS_psllq,     INS_invalid,   INS_invalid},           HW_Category_IMM,                               HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
@@ -391,14 +391,14 @@ HARDWARE_INTRINSIC(FMA_IsSupported,                                  "get_IsSupp
 
 //  LZCNT Intrinsics
 HARDWARE_INTRINSIC(LZCNT_IsSupported,                                "get_IsSupported",                                  LZCNT,      -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount,                           "LeadingZeroCount",                                 LZCNT,      -1,           0,            1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_lzcnt,     INS_invalid,   INS_lzcnt,     INS_invalid,   INS_invalid},           HW_Category_Scalar,                            HW_Flag_NoFloatingPointUsed)
+HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount,                           "LeadingZeroCount",                                 LZCNT,      -1,           0,            1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_lzcnt,     INS_invalid,   INS_lzcnt,     INS_invalid,   INS_invalid},           HW_Category_Scalar,                            HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 //  PCLMULQDQ Intrinsics
 HARDWARE_INTRINSIC(PCLMULQDQ_IsSupported,                            "get_IsSupported",                                  PCLMULQDQ,  -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
 
 //  POPCNT Intrinsics
 HARDWARE_INTRINSIC(POPCNT_IsSupported,                               "get_IsSupported",                                  POPCNT,     -1,           0,            0,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_IsSupportedProperty,               HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(POPCNT_PopCount,                                  "PopCount",                                         POPCNT,     -1,           0,            1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_popcnt,    INS_invalid,   INS_popcnt,    INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Scalar,                            HW_Flag_NoFloatingPointUsed)
+HARDWARE_INTRINSIC(POPCNT_PopCount,                                  "PopCount",                                         POPCNT,     -1,           0,            1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_popcnt,    INS_invalid,   INS_popcnt,    INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Scalar,                            HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 #endif // FEATURE_HW_INTRINSIC
 
 #undef HARDWARE_INTRINSIC
index e5a74fd..d60d213 100644 (file)
@@ -2253,14 +2253,18 @@ void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
 
 void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
 {
-    TreeNodeInfo*  info        = currentNodeInfo;
-    NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
-    InstructionSet isa         = Compiler::isaOfHWIntrinsic(intrinsicID);
-    int            numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+    TreeNodeInfo*       info        = currentNodeInfo;
+    NamedIntrinsic      intrinsicID = intrinsicTree->gtHWIntrinsicId;
+    InstructionSet      isa         = Compiler::isaOfHWIntrinsic(intrinsicID);
+    HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
+    HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
+    int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID);
+
     if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2)
     {
         SetContainsAVXFlags(true, 32);
     }
+
     GenTree* op1   = intrinsicTree->gtOp.gtOp1;
     GenTree* op2   = intrinsicTree->gtOp.gtOp2;
     info->srcCount = 0;
@@ -2285,8 +2289,7 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
         info->srcCount += GetOperandInfo(op2);
     }
 
-    if (Compiler::categoryOfHWIntrinsic(intrinsicID) == HW_Category_IMM &&
-        (Compiler::flagsOfHWIntrinsic(intrinsicID) & HW_Flag_NoJmpTableIMM) == 0)
+    if ((category == HW_Category_IMM) && ((flags & HW_Flag_NoJmpTableIMM) == 0))
     {
         GenTree* lastOp = Compiler::lastOpOfHWIntrinsic(intrinsicTree, numArgs);
         assert(lastOp != nullptr);
@@ -2303,6 +2306,32 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
         }
     }
 
+    if (!compiler->canUseVexEncoding())
+    {
+        // On machines without VEX support, we sometimes have to inject an intermediate
+        // `movaps targetReg, op1Reg` in order to maintain the correct behavior. This
+        // becomes a problem if `op2Reg == targetReg` since that means we will overwrite
+        // op2. In order to resolve this, we currently mark the second operand as delay free.
+
+        if ((flags & HW_Flag_NoRMWSemantics) == 0)
+        {
+            assert(category != HW_Category_MemoryLoad);
+            assert(category != HW_Category_MemoryStore);
+
+            assert((flags & HW_Flag_NoCodeGen) == 0);
+
+            assert(numArgs != 0);
+            assert(numArgs != 1);
+
+            if (info->srcCount >= 2)
+            {
+                LocationInfoListNode* op2Info = useList.Begin()->Next();
+                op2Info->info.isDelayFree     = true;
+                info->hasDelayFreeSrc         = true;
+            }
+        }
+    }
+
     switch (intrinsicID)
     {
         case NI_SSE_CompareEqualOrderedScalar:
index cf73932..7ad997e 100644 (file)
@@ -88,6 +88,10 @@ enum HWIntrinsicFlag : unsigned int
     // NoJmpTable IMM
     // the imm intrinsic does not need jumptable fallback when it gets non-const argument
     HW_Flag_NoJmpTableIMM = 0x2000,
+
+    // No Read/Modify/Write Semantics
+    // the intrinsic does not have read/modify/write semantics and doesn't need
+    HW_Flag_NoRMWSemantics = 0x4000,
 };
 
 inline HWIntrinsicFlag operator|(HWIntrinsicFlag c1, HWIntrinsicFlag c2)