Update the x86 hwintrinsic list to match the arm64 layout (#35364)
authorTanner Gooding <tagoo@outlook.com>
Fri, 24 Apr 2020 18:16:17 +0000 (11:16 -0700)
committerGitHub <noreply@github.com>
Fri, 24 Apr 2020 18:16:17 +0000 (11:16 -0700)
* Update the x86 hwintrinsic list to match the arm64 layout

* Applying formatting patch

src/coreclr/src/jit/gentree.cpp
src/coreclr/src/jit/hwintrinsic.cpp
src/coreclr/src/jit/hwintrinsic.h
src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/src/jit/hwintrinsiclistxarch.h
src/coreclr/src/jit/hwintrinsicxarch.cpp
src/coreclr/src/jit/namedintrinsiclist.h
src/coreclr/src/jit/valuenumfuncs.h

index 1a0b7d0..bd8e7e4 100644 (file)
@@ -17128,7 +17128,7 @@ GenTree* Compiler::gtGetSIMDZero(var_types simdType, var_types baseType, CORINFO
                     // We only return the HWIntrinsicNode if SSE is supported, since it is possible for
                     // the user to disable the SSE HWIntrinsic support via the COMPlus configuration knobs
                     // even though the hardware vector types are still available.
-                    return gtNewSimdHWIntrinsicNode(simdType, NI_Vector128_Zero, baseType, size);
+                    return gtNewSimdHWIntrinsicNode(simdType, NI_Vector128_get_Zero, baseType, size);
                 }
                 return nullptr;
             case TYP_SIMD32:
@@ -17137,7 +17137,7 @@ GenTree* Compiler::gtGetSIMDZero(var_types simdType, var_types baseType, CORINFO
                     // We only return the HWIntrinsicNode if AVX is supported, since it is possible for
                     // the user to disable the AVX HWIntrinsic support via the COMPlus configuration knobs
                     // even though the hardware vector types are still available.
-                    return gtNewSimdHWIntrinsicNode(simdType, NI_Vector256_Zero, baseType, size);
+                    return gtNewSimdHWIntrinsicNode(simdType, NI_Vector256_get_Zero, baseType, size);
                 }
                 return nullptr;
             default:
index 37ee480..e376d06 100644 (file)
@@ -10,8 +10,8 @@
 static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
 // clang-format off
 #if defined(TARGET_XARCH)
-#define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
-    {NI_##id, name, InstructionSet_##isa, static_cast<int>(ival), static_cast<unsigned>(size), numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
+#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
+    {NI_##isa##_##name, #name, InstructionSet_##isa, static_cast<unsigned>(size), numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
 #include "hwintrinsiclistxarch.h"
 #elif defined (TARGET_ARM64)
 #define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
index 62082ad..91b2eb0 100644 (file)
@@ -223,6 +223,30 @@ enum class FloatComparisonMode : unsigned char
     // _CMP_TRUE_US
     UnorderedTrueSignaling = 31,
 };
+
+enum class FloatRoundingMode : unsigned char
+{
+    // _MM_FROUND_TO_NEAREST_INT
+    ToNearestInteger = 0x00,
+
+    // _MM_FROUND_TO_NEG_INF
+    ToNegativeInfinity = 0x01,
+
+    // _MM_FROUND_TO_POS_INF
+    ToPositiveInfinity = 0x02,
+
+    // _MM_FROUND_TO_ZERO
+    ToZero = 0x03,
+
+    // _MM_FROUND_CUR_DIRECTION
+    CurrentDirection = 0x04,
+
+    // _MM_FROUND_RAISE_EXC
+    RaiseException = 0x00,
+
+    // _MM_FROUND_NO_EXC
+    NoException = 0x08,
+};
 #endif // TARGET_XARCH
 
 struct HWIntrinsicInfo
@@ -230,14 +254,11 @@ struct HWIntrinsicInfo
     NamedIntrinsic         id;
     const char*            name;
     CORINFO_InstructionSet isa;
-#ifdef TARGET_XARCH
-    int ival;
-#endif
-    unsigned            simdSize;
-    int                 numArgs;
-    instruction         ins[10];
-    HWIntrinsicCategory category;
-    HWIntrinsicFlag     flags;
+    unsigned               simdSize;
+    int                    numArgs;
+    instruction            ins[10];
+    HWIntrinsicCategory    category;
+    HWIntrinsicFlag        flags;
 
     static const HWIntrinsicInfo& lookup(NamedIntrinsic id);
 
@@ -289,7 +310,162 @@ struct HWIntrinsicInfo
 #ifdef TARGET_XARCH
     static int lookupIval(NamedIntrinsic id)
     {
-        return lookup(id).ival;
+        switch (id)
+        {
+            case NI_SSE_CompareEqual:
+            case NI_SSE_CompareScalarEqual:
+            case NI_SSE2_CompareEqual:
+            case NI_SSE2_CompareScalarEqual:
+            case NI_AVX_CompareEqual:
+            {
+                return static_cast<int>(FloatComparisonMode::OrderedEqualNonSignaling);
+            }
+
+            case NI_SSE_CompareGreaterThan:
+            case NI_SSE_CompareScalarGreaterThan:
+            case NI_SSE2_CompareGreaterThan:
+            case NI_SSE2_CompareScalarGreaterThan:
+            case NI_SSE_CompareLessThan:
+            case NI_SSE_CompareScalarLessThan:
+            case NI_SSE2_CompareLessThan:
+            case NI_SSE2_CompareScalarLessThan:
+            case NI_AVX_CompareLessThan:
+            {
+                return static_cast<int>(FloatComparisonMode::OrderedLessThanSignaling);
+            }
+
+            case NI_SSE_CompareGreaterThanOrEqual:
+            case NI_SSE_CompareScalarGreaterThanOrEqual:
+            case NI_SSE2_CompareGreaterThanOrEqual:
+            case NI_SSE2_CompareScalarGreaterThanOrEqual:
+            case NI_SSE_CompareLessThanOrEqual:
+            case NI_SSE_CompareScalarLessThanOrEqual:
+            case NI_SSE2_CompareLessThanOrEqual:
+            case NI_SSE2_CompareScalarLessThanOrEqual:
+            case NI_AVX_CompareLessThanOrEqual:
+            {
+                return static_cast<int>(FloatComparisonMode::OrderedLessThanOrEqualSignaling);
+            }
+
+            case NI_SSE_CompareNotEqual:
+            case NI_SSE_CompareScalarNotEqual:
+            case NI_SSE2_CompareNotEqual:
+            case NI_SSE2_CompareScalarNotEqual:
+            case NI_AVX_CompareNotEqual:
+            {
+                return static_cast<int>(FloatComparisonMode::UnorderedNotEqualNonSignaling);
+            }
+
+            case NI_SSE_CompareNotGreaterThan:
+            case NI_SSE_CompareScalarNotGreaterThan:
+            case NI_SSE2_CompareNotGreaterThan:
+            case NI_SSE2_CompareScalarNotGreaterThan:
+            case NI_SSE_CompareNotLessThan:
+            case NI_SSE_CompareScalarNotLessThan:
+            case NI_SSE2_CompareNotLessThan:
+            case NI_SSE2_CompareScalarNotLessThan:
+            case NI_AVX_CompareNotLessThan:
+            {
+                return static_cast<int>(FloatComparisonMode::UnorderedNotLessThanSignaling);
+            }
+
+            case NI_SSE_CompareNotGreaterThanOrEqual:
+            case NI_SSE_CompareScalarNotGreaterThanOrEqual:
+            case NI_SSE2_CompareNotGreaterThanOrEqual:
+            case NI_SSE2_CompareScalarNotGreaterThanOrEqual:
+            case NI_SSE_CompareNotLessThanOrEqual:
+            case NI_SSE_CompareScalarNotLessThanOrEqual:
+            case NI_SSE2_CompareNotLessThanOrEqual:
+            case NI_SSE2_CompareScalarNotLessThanOrEqual:
+            case NI_AVX_CompareNotLessThanOrEqual:
+            {
+                return static_cast<int>(FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling);
+            }
+
+            case NI_SSE_CompareOrdered:
+            case NI_SSE_CompareScalarOrdered:
+            case NI_SSE2_CompareOrdered:
+            case NI_SSE2_CompareScalarOrdered:
+            case NI_AVX_CompareOrdered:
+            {
+                return static_cast<int>(FloatComparisonMode::OrderedNonSignaling);
+            }
+
+            case NI_SSE_CompareUnordered:
+            case NI_SSE_CompareScalarUnordered:
+            case NI_SSE2_CompareUnordered:
+            case NI_SSE2_CompareScalarUnordered:
+            case NI_AVX_CompareUnordered:
+            {
+                return static_cast<int>(FloatComparisonMode::UnorderedNonSignaling);
+            }
+
+            case NI_SSE41_Ceiling:
+            case NI_SSE41_CeilingScalar:
+            case NI_SSE41_RoundToPositiveInfinity:
+            case NI_SSE41_RoundToPositiveInfinityScalar:
+            case NI_AVX_Ceiling:
+            case NI_AVX_RoundToPositiveInfinity:
+            {
+                return static_cast<int>(FloatRoundingMode::ToPositiveInfinity);
+            }
+
+            case NI_SSE41_Floor:
+            case NI_SSE41_FloorScalar:
+            case NI_SSE41_RoundToNegativeInfinity:
+            case NI_SSE41_RoundToNegativeInfinityScalar:
+            case NI_AVX_Floor:
+            case NI_AVX_RoundToNegativeInfinity:
+            {
+                return static_cast<int>(FloatRoundingMode::ToNegativeInfinity);
+            }
+
+            case NI_SSE41_RoundCurrentDirection:
+            case NI_SSE41_RoundCurrentDirectionScalar:
+            case NI_AVX_RoundCurrentDirection:
+            {
+                return static_cast<int>(FloatRoundingMode::CurrentDirection);
+            }
+
+            case NI_SSE41_RoundToNearestInteger:
+            case NI_SSE41_RoundToNearestIntegerScalar:
+            case NI_AVX_RoundToNearestInteger:
+            {
+                return static_cast<int>(FloatRoundingMode::ToNearestInteger);
+            }
+
+            case NI_SSE41_RoundToZero:
+            case NI_SSE41_RoundToZeroScalar:
+            case NI_AVX_RoundToZero:
+            {
+                return static_cast<int>(FloatRoundingMode::ToZero);
+            }
+
+            case NI_AVX_CompareGreaterThan:
+            {
+                return static_cast<int>(FloatComparisonMode::OrderedGreaterThanSignaling);
+            }
+
+            case NI_AVX_CompareGreaterThanOrEqual:
+            {
+                return static_cast<int>(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling);
+            }
+
+            case NI_AVX_CompareNotGreaterThan:
+            {
+                return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanSignaling);
+            }
+
+            case NI_AVX_CompareNotGreaterThanOrEqual:
+            {
+                return static_cast<int>(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling);
+            }
+
+            default:
+            {
+                return -1;
+            }
+        }
     }
 #endif
 
index c01273b..240244f 100644 (file)
@@ -1231,8 +1231,8 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
             break;
         }
 
-        case NI_Vector128_Zero:
-        case NI_Vector256_Zero:
+        case NI_Vector128_get_Zero:
+        case NI_Vector256_get_Zero:
         {
             assert(op1 == nullptr);
             emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
index e3a81f4..c2b1aee 100644 (file)
     9) Each intrinsic has one or more flags with type of `enum HWIntrinsicFlag`
 */
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  Vector128 Intrinsics
-HARDWARE_INTRINSIC(Vector128_As,                                    "As",                                    Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsByte,                                "AsByte",                                Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsDouble,                              "AsDouble",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsInt16,                               "AsInt16",                               Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsInt32,                               "AsInt32",                               Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsInt64,                               "AsInt64",                               Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsSByte,                               "AsSByte",                               Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsSingle,                              "AsSingle",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsUInt16,                              "AsUInt16",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsUInt32,                              "AsUInt32",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsUInt64,                              "AsUInt64",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsVector,                              "AsVector",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsVector2,                             "AsVector2",                             Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsVector3,                             "AsVector3",                             Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsVector4,                             "AsVector4",                             Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_AsVector128,                           "AsVector128",                           Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_Count,                                 "get_Count",                             Vector128,          -1,              16,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_CreateScalarUnsafe,                    "CreateScalarUnsafe",                    Vector128,          -1,              16,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_GetElement,                            "GetElement",                            Vector128,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector128_WithElement,                           "WithElement",                           Vector128,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector128_ToScalar,                              "ToScalar",                              Vector128,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_ToVector256,                           "ToVector256",                           Vector128,          -1,              16,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_ToVector256Unsafe,                     "ToVector256Unsafe",                     Vector128,          -1,              16,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector128_Zero,                                  "get_Zero",                              Vector128,          -1,              16,           0,     {INS_xorps,             INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-
-// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+HARDWARE_INTRINSIC(Vector128,       As,                                         16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsByte,                                     16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsDouble,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsInt16,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsInt32,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsInt64,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsSByte,                                    16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsSingle,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsUInt16,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsUInt32,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsUInt64,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsVector,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsVector2,                                  16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsVector3,                                  16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsVector4,                                  16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       AsVector128,                                16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       CreateScalarUnsafe,                         16,             1,      {INS_mov_i2xmm,         INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       get_Count,                                  16,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       get_Zero,                                   16,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       GetElement,                                 16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector128,       WithElement,                                16,             3,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector128,       ToScalar,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       ToVector256,                                16,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       ToVector256Unsafe,                          16,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  Vector256 Intrinsics
-HARDWARE_INTRINSIC(Vector256_As,                                    "As",                                    Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsByte,                                "AsByte",                                Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsDouble,                              "AsDouble",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsInt16,                               "AsInt16",                               Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsInt32,                               "AsInt32",                               Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsInt64,                               "AsInt64",                               Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsSByte,                               "AsSByte",                               Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsSingle,                              "AsSingle",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsUInt16,                              "AsUInt16",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsUInt32,                              "AsUInt32",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsUInt64,                              "AsUInt64",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsVector,                              "AsVector",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_AsVector256,                           "AsVector256",                           Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_Count,                                 "get_Count",                             Vector256,          -1,              32,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_CreateScalarUnsafe,                    "CreateScalarUnsafe",                    Vector256,          -1,              32,           1,     {INS_mov_i2xmm,         INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_mov_i2xmm,      INS_movss,          INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_GetElement,                            "GetElement",                            Vector256,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector256_WithElement,                           "WithElement",                           Vector256,          -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector256_GetLower,                              "GetLower",                              Vector256,          -1,              32,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_ToScalar,                              "ToScalar",                              Vector256,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(Vector256_Zero,                                  "get_Zero",                              Vector256,          -1,              32,           0,     {INS_xorps,             INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps,          INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-
-// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+HARDWARE_INTRINSIC(Vector256,       As,                                         32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsByte,                                     32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsDouble,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsInt16,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsInt32,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsInt64,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsSByte,                                    32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsSingle,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsUInt16,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsUInt32,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsUInt64,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsVector,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       AsVector256,                                32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       get_Count,                                  32,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       get_Zero,                                   32,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       CreateScalarUnsafe,                         32,             1,      {INS_mov_i2xmm,         INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       GetElement,                                 32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector256,       WithElement,                                32,             3,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(Vector256,       GetLower,                                   32,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       ToScalar,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE Intrinsics
-HARDWARE_INTRINSIC(SSE_Add,                                         "Add",                                          SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_AddScalar,                                   "AddScalar",                                    SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_And,                                         "And",                                          SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_andps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_AndNot,                                      "AndNot",                                       SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_andnps,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareEqual,                                "CompareEqual",                                 SSE, FloatComparisonMode::OrderedEqualNonSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareScalarOrderedEqual,                   "CompareScalarOrderedEqual",                    SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comiss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareScalarEqual,                          "CompareScalarEqual",                           SSE, FloatComparisonMode::OrderedEqualNonSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedEqual,                 "CompareScalarUnorderedEqual",                  SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomiss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThan,                          "CompareGreaterThan",                           SSE, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE_CompareScalarOrderedGreaterThan,             "CompareScalarOrderedGreaterThan",              SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comiss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareScalarGreaterThan,                    "CompareScalarGreaterThan",                     SSE, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedGreaterThan,           "CompareScalarUnorderedGreaterThan",            SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomiss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual,                   "CompareGreaterThanOrEqual",                    SSE, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE_CompareScalarOrderedGreaterThanOrEqual,      "CompareScalarOrderedGreaterThanOrEqual",       SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comiss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareScalarGreaterThanOrEqual,             "CompareScalarGreaterThanOrEqual",              SSE, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedGreaterThanOrEqual,    "CompareScalarUnorderedGreaterThanOrEqual",     SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomiss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareLessThan,                             "CompareLessThan",                              SSE, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareScalarOrderedLessThan,                "CompareScalarOrderedLessThan",                 SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comiss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareScalarLessThan,                       "CompareScalarLessThan",                        SSE, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedLessThan,              "CompareScalarUnorderedLessThan",               SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomiss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual,                      "CompareLessThanOrEqual",                       SSE, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareScalarOrderedLessThanOrEqual,         "CompareScalarOrderedLessThanOrEqual",          SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comiss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareScalarLessThanOrEqual,                "CompareScalarLessThanOrEqual",                 SSE, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedLessThanOrEqual,       "CompareScalarUnorderedLessThanOrEqual",        SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomiss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareNotEqual,                             "CompareNotEqual",                              SSE, FloatComparisonMode::UnorderedNotEqualNonSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareScalarOrderedNotEqual,                "CompareScalarOrderedNotEqual",                 SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comiss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareScalarNotEqual,                       "CompareScalarNotEqual",                        SSE, FloatComparisonMode::UnorderedNotEqualNonSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareScalarUnorderedNotEqual,              "CompareScalarUnorderedNotEqual",               SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomiss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan,                       "CompareNotGreaterThan",                        SSE, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE_CompareScalarNotGreaterThan,                 "CompareScalarNotGreaterThan",                  SSE, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual,                "CompareNotGreaterThanOrEqual",                 SSE, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE_CompareScalarNotGreaterThanOrEqual,          "CompareScalarNotGreaterThanOrEqual",           SSE, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThan,                          "CompareNotLessThan",                           SSE, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareScalarNotLessThan,                    "CompareScalarNotLessThan",                     SSE, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqual,                   "CompareNotLessThanOrEqual",                    SSE, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareScalarNotLessThanOrEqual,             "CompareScalarNotLessThanOrEqual",              SSE, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareOrdered,                              "CompareOrdered",                               SSE, FloatComparisonMode::OrderedNonSignaling,                  16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareScalarOrdered,                        "CompareScalarOrdered",                         SSE, FloatComparisonMode::OrderedNonSignaling,                  16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareUnordered,                            "CompareUnordered",                             SSE, FloatComparisonMode::UnorderedNonSignaling,                16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareScalarUnordered,                      "CompareScalarUnordered",                       SSE, FloatComparisonMode::UnorderedNonSignaling,                16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32,                              "ConvertToInt32",                               SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtss2si,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single,              "ConvertScalarToVector128Single",               SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2ss,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation,                "ConvertToInt32WithTruncation",                 SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttss2si,      INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_Divide,                                      "Divide",                                       SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_DivideScalar,                                "DivideScalar",                                 SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_LoadAlignedVector128,                        "LoadAlignedVector128",                         SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movaps,         INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_LoadHigh,                                    "LoadHigh",                                     SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movhps,         INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_LoadLow,                                     "LoadLow",                                      SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movlps,         INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_LoadScalarVector128,                         "LoadScalarVector128",                          SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_LoadVector128,                               "LoadVector128",                                SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movups,         INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_Max,                                         "Max",                                          SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_maxps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_MaxScalar,                                   "MaxScalar",                                    SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_maxss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_Min,                                         "Min",                                          SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_minps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_MinScalar,                                   "MinScalar",                                    SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_minss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_MoveHighToLow,                               "MoveHighToLow",                                SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movhlps,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_MoveLowToHigh,                               "MoveLowToHigh",                                SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movlhps,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_MoveMask,                                    "MoveMask",                                     SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movmskps,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE_MoveScalar,                                  "MoveScalar",                                   SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE_Multiply,                                    "Multiply",                                     SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_MultiplyScalar,                              "MultiplyScalar",                               SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_Or,                                          "Or",                                           SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_orps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_Prefetch0,                                   "Prefetch0",                                    SSE,          -1,               0,           1,     {INS_invalid,           INS_prefetcht0,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_Prefetch1,                                   "Prefetch1",                                    SSE,          -1,               0,           1,     {INS_invalid,           INS_prefetcht1,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_Prefetch2,                                   "Prefetch2",                                    SSE,          -1,               0,           1,     {INS_invalid,           INS_prefetcht2,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_PrefetchNonTemporal,                         "PrefetchNonTemporal",                          SSE,          -1,               0,           1,     {INS_invalid,           INS_prefetchnta,    INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_Reciprocal,                                  "Reciprocal",                                   SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_rcpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ReciprocalScalar,                            "ReciprocalScalar",                             SSE,          -1,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_rcpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_ReciprocalSqrt,                              "ReciprocalSqrt",                               SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_rsqrtps,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ReciprocalSqrtScalar,                        "ReciprocalSqrtScalar",                         SSE,          -1,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_rsqrtss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_Shuffle,                                     "Shuffle",                                      SSE,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_shufps,         INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE_Sqrt,                                        "Sqrt",                                         SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sqrtps,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_SqrtScalar,                                  "SqrtScalar",                                   SSE,          -1,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sqrtss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_Store,                                       "Store",                                        SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movups,         INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE_StoreAligned,                                "StoreAligned",                                 SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movaps,         INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal,                     "StoreAlignedNonTemporal",                      SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movntps,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE_StoreFence,                                  "StoreFence",                                   SSE,          -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_StoreHigh,                                   "StoreHigh",                                    SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movhps,         INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE_StoreLow,                                    "StoreLow",                                     SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movlps,         INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE_StoreScalar,                                 "StoreScalar",                                  SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movss,          INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE_Subtract,                                    "Subtract",                                     SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_subps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SubtractScalar,                              "SubtractScalar",                               SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_subss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_UnpackHigh,                                  "UnpackHigh",                                   SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_unpckhps,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_UnpackLow,                                   "UnpackLow",                                    SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_unpcklps,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Xor,                                         "Xor",                                          SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_xorps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-
-// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+HARDWARE_INTRINSIC(SSE,             Add,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             AddScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             And,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             AndNot,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andnps,             INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             CompareEqual,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareScalarEqual,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedEqual,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareGreaterThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedGreaterThan,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareScalarGreaterThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedGreaterThan,          16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedGreaterThanOrEqual,     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareScalarGreaterThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedGreaterThanOrEqual,   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareLessThan,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedLessThan,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareScalarLessThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedLessThan,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareLessThanOrEqual,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedLessThanOrEqual,        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareScalarLessThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedLessThanOrEqual,      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareNotEqual,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             CompareScalarOrderedNotEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareScalarNotEqual,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareScalarUnorderedNotEqual,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareScalarNotGreaterThan,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE,             CompareScalarNotGreaterThanOrEqual,         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareNotLessThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             CompareScalarNotLessThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareNotLessThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             CompareScalarNotLessThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareOrdered,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             CompareScalarOrdered,                       16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             CompareUnordered,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             CompareScalarUnordered,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             ConvertToInt32,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             ConvertScalarToVector128Single,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             ConvertToInt32WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             Divide,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             DivideScalar,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             LoadAlignedVector128,                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movaps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             LoadHigh,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             LoadLow,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlps,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             LoadScalarVector128,                        16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             LoadVector128,                              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movups,             INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             Max,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             MaxScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             Min,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             MinScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             MoveHighToLow,                              16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhlps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE,             MoveLowToHigh,                              16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlhps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE,             MoveMask,                                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movmskps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE,             MoveScalar,                                 16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE,             Multiply,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             MultiplyScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             Or,                                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_orps,               INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE,             Prefetch0,                                   0,              1,     {INS_invalid,           INS_prefetcht0,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             Prefetch1,                                   0,              1,     {INS_invalid,           INS_prefetcht1,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             Prefetch2,                                   0,              1,     {INS_invalid,           INS_prefetcht2,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             PrefetchNonTemporal,                         0,              1,     {INS_invalid,           INS_prefetchnta,        INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             Reciprocal,                                 16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rcpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             ReciprocalScalar,                           16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rcpss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             ReciprocalSqrt,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rsqrtps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             ReciprocalSqrtScalar,                       16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rsqrtss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             Shuffle,                                    16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_shufps,             INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE,             Sqrt,                                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtps,             INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             SqrtScalar,                                 16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             Store,                                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movups,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE,             StoreAligned,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movaps,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE,             StoreAlignedNonTemporal,                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movntps,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE,             StoreFence,                                  0,              0,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             StoreHigh,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhps,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE,             StoreLow,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlps,             INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE,             StoreScalar,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE,             Subtract,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             SubtractScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subss,              INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE,             UnpackHigh,                                 16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpckhps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             UnpackLow,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpcklps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE,             Xor,                                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_xorps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE 64-bit-only Intrinsics
-HARDWARE_INTRINSIC(SSE_X64_ConvertToInt64,                          "ConvertToInt64",                               SSE_X64,      -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtss2si,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE_X64_ConvertToInt64WithTruncation,            "ConvertToInt64WithTruncation",                 SSE_X64,      -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttss2si,      INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE_X64_ConvertScalarToVector128Single,          "ConvertScalarToVector128Single",               SSE_X64,      -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2ss,       INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2si,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE_X64,         ConvertToInt64WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttss2si,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE_X64,         ConvertScalarToVector128Single,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2ss,           INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE2 Intrinsics
-HARDWARE_INTRINSIC(SSE2_Add,                                        "Add",                                          SSE2,         -1,              16,           2,     {INS_paddb,             INS_paddb,          INS_paddw,          INS_paddw,          INS_paddd,          INS_paddd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_AddSaturate,                                "AddSaturate",                                  SSE2,         -1,              16,           2,     {INS_paddsb,            INS_paddusb,        INS_paddsw,         INS_paddusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_AddScalar,                                  "AddScalar",                                    SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_And,                                        "And",                                          SSE2,         -1,              16,           2,     {INS_pand,              INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_invalid,        INS_andpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_AndNot,                                     "AndNot",                                       SSE2,         -1,              16,           2,     {INS_pandn,             INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_invalid,        INS_andnpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_Average,                                    "Average",                                      SSE2,         -1,              16,           2,     {INS_invalid,           INS_pavgb,          INS_invalid,        INS_pavgw,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareEqual,                               "CompareEqual",                                 SSE2, FloatComparisonMode::OrderedEqualNonSignaling,             16,           2,     {INS_pcmpeqb,           INS_pcmpeqb,        INS_pcmpeqw,        INS_pcmpeqw,        INS_pcmpeqd,        INS_pcmpeqd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedEqual,                  "CompareScalarOrderedEqual",                    SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareScalarEqual,                         "CompareScalarEqual",                           SSE2, FloatComparisonMode::OrderedEqualNonSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedEqual,                "CompareScalarUnorderedEqual",                  SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThan,                         "CompareGreaterThan",                           SSE2, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_pcmpgtb,           INS_invalid,        INS_pcmpgtw,        INS_invalid,        INS_pcmpgtd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedGreaterThan,            "CompareScalarOrderedGreaterThan",              SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareScalarGreaterThan,                   "CompareScalarGreaterThan",                     SSE2, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedGreaterThan,          "CompareScalarUnorderedGreaterThan",            SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual,                  "CompareGreaterThanOrEqual",                    SSE2, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedGreaterThanOrEqual,     "CompareScalarOrderedGreaterThanOrEqual",       SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareScalarGreaterThanOrEqual,            "CompareScalarGreaterThanOrEqual",              SSE2, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedGreaterThanOrEqual,   "CompareScalarUnorderedGreaterThanOrEqual",     SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareLessThan,                            "CompareLessThan",                              SSE2, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_pcmpgtb,           INS_invalid,        INS_pcmpgtw,        INS_invalid,        INS_pcmpgtd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_Special,                HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedLessThan,               "CompareScalarOrderedLessThan",                 SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareScalarLessThan,                      "CompareScalarLessThan",                        SSE2, FloatComparisonMode::OrderedLessThanSignaling,             16,           2,     {INS_pcmpgtb,           INS_invalid,        INS_pcmpgtw,        INS_invalid,        INS_pcmpgtd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedLessThan,             "CompareScalarUnorderedLessThan",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqual,                     "CompareLessThanOrEqual",                       SSE2, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedLessThanOrEqual,        "CompareScalarOrderedLessThanOrEqual",          SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareScalarLessThanOrEqual,               "CompareScalarLessThanOrEqual",                 SSE2, FloatComparisonMode::OrderedLessThanOrEqualSignaling,      16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedLessThanOrEqual,      "CompareScalarUnorderedLessThanOrEqual",        SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqual,                            "CompareNotEqual",                              SSE2, FloatComparisonMode::UnorderedNotEqualNonSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareScalarOrderedNotEqual,               "CompareScalarOrderedNotEqual",                 SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareScalarNotEqual,                      "CompareScalarNotEqual",                        SSE2, FloatComparisonMode::UnorderedNotEqualNonSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareScalarUnorderedNotEqual,             "CompareScalarUnorderedNotEqual",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan,                      "CompareNotGreaterThan",                        SSE2, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE2_CompareScalarNotGreaterThan,                "CompareScalarNotGreaterThan",                  SSE2, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual,               "CompareNotGreaterThanOrEqual",                 SSE2, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(SSE2_CompareScalarNotGreaterThanOrEqual,         "CompareScalarNotGreaterThanOrEqual",           SSE2, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareNotLessThan,                         "CompareNotLessThan",                           SSE2, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareScalarNotLessThan,                   "CompareScalarNotLessThan",                     SSE2, FloatComparisonMode::UnorderedNotLessThanSignaling,        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareNotLessThanOrEqual,                  "CompareNotLessThanOrEqual",                    SSE2, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareScalarNotLessThanOrEqual,            "CompareScalarNotLessThanOrEqual",              SSE2, FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling, 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareOrdered,                             "CompareOrdered",                               SSE2, FloatComparisonMode::OrderedNonSignaling,                  16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareScalarOrdered,                       "CompareScalarOrdered",                         SSE2, FloatComparisonMode::OrderedNonSignaling,                  16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareUnordered,                           "CompareUnordered",                             SSE2, FloatComparisonMode::UnorderedNonSignaling,                16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareScalarUnordered,                     "CompareScalarUnordered",                       SSE2, FloatComparisonMode::UnorderedNonSignaling,                16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32,                             "ConvertToInt32",                               SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation,               "ConvertToInt32WithTruncation",                 SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt32,                            "ConvertToUInt32",                              SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double,                   "ConvertToVector128Double",                     SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtdq2pd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtps2pd,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double,             "ConvertScalarToVector128Double",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2sd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtss2sd,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32,                    "ConvertToVector128Int32",                      SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtps2dq,       INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int32,              "ConvertScalarToVector128Int32",                SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation,      "ConvertToVector128Int32WithTruncation",        SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttps2dq,      INS_cvttpd2dq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single,                   "ConvertToVector128Single",                     SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtdq2ps,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single,             "ConvertScalarToVector128Single",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsd2ss,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt32,             "ConvertScalarToVector128UInt32",               SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_Divide,                                     "Divide",                                       SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_DivideScalar,                               "DivideScalar",                                 SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_Extract,                                    "Extract",                                      SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pextrw,         INS_pextrw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_Insert,                                     "Insert",                                       SSE2,         -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_pinsrw,         INS_pinsrw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128,                       "LoadAlignedVector128",                         SSE2,         -1,              16,           1,     {INS_movdqa,            INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_invalid,        INS_movapd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_LoadFence,                                  "LoadFence",                                    SSE2,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_LoadHigh,                                   "LoadHigh",                                     SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movhpd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_LoadLow,                                    "LoadLow",                                      SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movlpd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_LoadScalarVector128,                        "LoadScalarVector128",                          SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_movd,           INS_movd,           INS_movq,           INS_movq,           INS_invalid,        INS_movsdsse2},         HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_LoadVector128,                              "LoadVector128",                                SSE2,         -1,              16,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_invalid,        INS_movupd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_MaskMove,                                   "MaskMove",                                     SSE2,         -1,              16,           3,     {INS_maskmovdqu,        INS_maskmovdqu,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_Max,                                        "Max",                                          SSE2,         -1,              16,           2,     {INS_invalid,           INS_pmaxub,         INS_pmaxsw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MemoryFence,                                "MemoryFence",                                  SSE2,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_MaxScalar,                                  "MaxScalar",                                    SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_maxsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_Min,                                        "Min",                                          SSE2,         -1,              16,           2,     {INS_invalid,           INS_pminub,         INS_pminsw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MinScalar,                                  "MinScalar",                                    SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_minsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_MoveMask,                                   "MoveMask",                                     SSE2,         -1,              16,           1,     {INS_pmovmskb,          INS_pmovmskb,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movmskpd},          HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE2_MoveScalar,                                 "MoveScalar",                                   SSE2,         -1,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movq,           INS_movq,           INS_invalid,        INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(SSE2_Multiply,                                   "Multiply",                                     SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pmuludq,        INS_invalid,        INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MultiplyHigh,                               "MultiplyHigh",                                 SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pmulhw,         INS_pmulhuw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MultiplyAddAdjacent,                        "MultiplyAddAdjacent",                          SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pmaddwd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MultiplyLow,                                "MultiplyLow",                                  SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pmullw,         INS_pmullw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_MultiplyScalar,                             "MultiplyScalar",                               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_Or,                                         "Or",                                           SSE2,         -1,              16,           2,     {INS_por,               INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_invalid,        INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_PackSignedSaturate,                         "PackSignedSaturate",                           SSE2,         -1,              16,           2,     {INS_packsswb,          INS_invalid,        INS_packssdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_PackUnsignedSaturate,                       "PackUnsignedSaturate",                         SSE2,         -1,              16,           2,     {INS_invalid,           INS_packuswb,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_SumAbsoluteDifferences,                     "SumAbsoluteDifferences",                       SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,         INS_invalid,       INS_psadbw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical,                           "ShiftLeftLogical",                             SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_psllw,          INS_psllw,          INS_pslld,          INS_pslld,          INS_psllq,          INS_psllq,          INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane,                 "ShiftLeftLogical128BitLane",                   SSE2,         -1,              16,           2,     {INS_pslldq,            INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic,                       "ShiftRightArithmetic",                         SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_psraw,          INS_invalid,        INS_psrad,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_ShiftRightLogical,                          "ShiftRightLogical",                            SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_psrlw,          INS_psrlw,          INS_psrld,          INS_psrld,          INS_psrlq,          INS_psrlq,          INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_ShiftRightLogical128BitLane,                "ShiftRightLogical128BitLane",                  SSE2,         -1,              16,           2,     {INS_psrldq,            INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_Shuffle,                                    "Shuffle",                                      SSE2,         -1,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pshufd,         INS_pshufd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_shufpd},            HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_ShuffleHigh,                                "ShuffleHigh",                                  SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pshufhw,        INS_pshufhw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_ShuffleLow,                                 "ShuffleLow",                                   SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pshuflw,        INS_pshuflw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE2_Sqrt,                                       "Sqrt",                                         SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_SqrtScalar,                                 "SqrtScalar",                                   SSE2,         -1,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sqrtsd},            HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_Store,                                      "Store",                                        SSE2,         -1,              16,           2,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_invalid,        INS_movupd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_StoreAligned,                               "StoreAligned",                                 SSE2,         -1,              16,           2,     {INS_movdqa,            INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_invalid,        INS_movapd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal,                    "StoreAlignedNonTemporal",                      SSE2,         -1,              16,           2,     {INS_movntdq,           INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_invalid,        INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_StoreHigh,                                  "StoreHigh",                                    SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movhpd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_StoreLow,                                   "StoreLow",                                     SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movlpd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_StoreNonTemporal,                           "StoreNonTemporal",                             SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_movnti,         INS_movnti,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_StoreScalar,                                "StoreScalar",                                  SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_movd,           INS_movd,           INS_movq,           INS_movq,           INS_invalid,        INS_movsdsse2},         HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_Subtract,                                   "Subtract",                                     SSE2,         -1,              16,           2,     {INS_psubb,             INS_psubb,          INS_psubw,          INS_psubw,          INS_psubd,          INS_psubd,          INS_psubq,          INS_psubq,          INS_invalid,        INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_SubtractSaturate,                           "SubtractSaturate",                             SSE2,         -1,              16,           2,     {INS_psubsb,            INS_psubusb,        INS_psubsw,         INS_psubusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_SubtractScalar,                             "SubtractScalar",                               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_subsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_UnpackHigh,                                 "UnpackHigh",                                   SSE2,         -1,              16,           2,     {INS_punpckhbw,         INS_punpckhbw,      INS_punpckhwd,      INS_punpckhwd,      INS_punpckhdq,      INS_punpckhdq,      INS_punpckhqdq,     INS_punpckhqdq,     INS_invalid,        INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_UnpackLow,                                  "UnpackLow",                                    SSE2,         -1,              16,           2,     {INS_punpcklbw,         INS_punpcklbw,      INS_punpcklwd,      INS_punpcklwd,      INS_punpckldq,      INS_punpckldq,      INS_punpcklqdq,     INS_punpcklqdq,     INS_invalid,        INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_Xor,                                        "Xor",                                          SSE2,         -1,              16,           2,     {INS_pxor,              INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_invalid,        INS_xorpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-
-// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+HARDWARE_INTRINSIC(SSE2,            Add,                                        16,              2,     {INS_paddb,             INS_paddb,              INS_paddw,              INS_paddw,              INS_paddd,              INS_paddd,              INS_paddq,              INS_paddq,              INS_invalid,            INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            AddSaturate,                                16,              2,     {INS_paddsb,            INS_paddusb,            INS_paddsw,             INS_paddusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            AddScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            And,                                        16,              2,     {INS_pand,              INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_invalid,            INS_andpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            AndNot,                                     16,              2,     {INS_pandn,             INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_invalid,            INS_andnpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            Average,                                    16,              2,     {INS_invalid,           INS_pavgb,              INS_invalid,            INS_pavgw,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            CompareEqual,                               16,              2,     {INS_pcmpeqb,           INS_pcmpeqb,            INS_pcmpeqw,            INS_pcmpeqw,            INS_pcmpeqd,            INS_pcmpeqd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarEqual,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedEqual,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareGreaterThan,                         16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedGreaterThan,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarGreaterThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedGreaterThan,          16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareGreaterThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedGreaterThanOrEqual,     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarGreaterThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedGreaterThanOrEqual,   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareLessThan,                            16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_Special,                HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedLessThan,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarLessThan,                      16,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedLessThan,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareLessThanOrEqual,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedLessThanOrEqual,        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarLessThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedLessThanOrEqual,      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareNotEqual,                            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarOrderedNotEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarNotEqual,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarUnorderedNotEqual,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThan,                      16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarNotGreaterThan,                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareNotGreaterThanOrEqual,               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarNotGreaterThanOrEqual,         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareNotLessThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarNotLessThan,                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareNotLessThanOrEqual,                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarNotLessThanOrEqual,            16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareOrdered,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarOrdered,                       16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            CompareUnordered,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            CompareScalarUnordered,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            ConvertToInt32,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToInt32WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToUInt32,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Double,                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Double,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2sd,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Int32,                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Int32,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Int32WithTruncation,      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttps2dq,          INS_cvttpd2dq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Single,                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Single,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2ss,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128UInt32,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            Divide,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            DivideScalar,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            Extract,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_pextrw,             INS_pextrw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            Insert,                                     16,              3,     {INS_invalid,           INS_invalid,            INS_pinsrw,             INS_pinsrw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            LoadAlignedVector128,                       16,              1,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_invalid,            INS_movapd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            LoadFence,                                   0,              0,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            LoadHigh,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhpd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            LoadLow,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlpd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            LoadScalarVector128,                        16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_movd,               INS_movq,               INS_movq,               INS_invalid,            INS_movsdsse2},         HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            LoadVector128,                              16,              1,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_invalid,            INS_movupd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            MaskMove,                                   16,              3,     {INS_maskmovdqu,        INS_maskmovdqu,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            Max,                                        16,              2,     {INS_invalid,           INS_pmaxub,             INS_pmaxsw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            MemoryFence,                                 0,              0,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Special,                HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            MaxScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            Min,                                        16,              2,     {INS_invalid,           INS_pminub,             INS_pminsw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            MinScalar,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            MoveMask,                                   16,              1,     {INS_pmovmskb,          INS_pmovmskb,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movmskpd},          HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE2,            MoveScalar,                                 16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movq,               INS_movq,               INS_invalid,            INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE2,            Multiply,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuludq,            INS_invalid,            INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            MultiplyHigh,                               16,              2,     {INS_invalid,           INS_invalid,            INS_pmulhw,             INS_pmulhuw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            MultiplyAddAdjacent,                        16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pmaddwd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            MultiplyLow,                                16,              2,     {INS_invalid,           INS_invalid,            INS_pmullw,             INS_pmullw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            MultiplyScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            Or,                                         16,              2,     {INS_por,               INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_invalid,            INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2,            PackSignedSaturate,                         16,              2,     {INS_packsswb,          INS_invalid,            INS_packssdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            PackUnsignedSaturate,                       16,              2,     {INS_invalid,           INS_packuswb,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            SumAbsoluteDifferences,                     16,              2,     {INS_invalid,           INS_invalid,             INS_invalid,           INS_psadbw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            ShiftLeftLogical,                           16,              2,     {INS_invalid,           INS_invalid,            INS_psllw,              INS_psllw,              INS_pslld,              INS_pslld,              INS_psllq,              INS_psllq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            ShiftLeftLogical128BitLane,                 16,              2,     {INS_pslldq,            INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            ShiftRightArithmetic,                       16,              2,     {INS_invalid,           INS_invalid,            INS_psraw,              INS_invalid,            INS_psrad,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            ShiftRightLogical,                          16,              2,     {INS_invalid,           INS_invalid,            INS_psrlw,              INS_psrlw,              INS_psrld,              INS_psrld,              INS_psrlq,              INS_psrlq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            ShiftRightLogical128BitLane,                16,              2,     {INS_psrldq,            INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            Shuffle,                                    16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pshufd,             INS_pshufd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_shufpd},            HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            ShuffleHigh,                                16,              2,     {INS_invalid,           INS_invalid,            INS_pshufhw,            INS_pshufhw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            ShuffleLow,                                 16,              2,     {INS_invalid,           INS_invalid,            INS_pshuflw,            INS_pshuflw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE2,            Sqrt,                                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            SqrtScalar,                                 16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtsd},            HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            Store,                                      16,              2,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_invalid,            INS_movupd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            StoreAligned,                               16,              2,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_invalid,            INS_movapd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            StoreAlignedNonTemporal,                    16,              2,     {INS_movntdq,           INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_invalid,            INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            StoreHigh,                                  16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movhpd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            StoreLow,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movlpd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            StoreNonTemporal,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movnti,             INS_movnti,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            StoreScalar,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_movd,               INS_movq,               INS_movq,               INS_invalid,            INS_movsdsse2},         HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2,            Subtract,                                   16,              2,     {INS_psubb,             INS_psubb,              INS_psubw,              INS_psubw,              INS_psubd,              INS_psubd,              INS_psubq,              INS_psubq,              INS_invalid,            INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            SubtractSaturate,                           16,              2,     {INS_psubsb,            INS_psubusb,            INS_psubsw,             INS_psubusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            SubtractScalar,                             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE2,            UnpackHigh,                                 16,              2,     {INS_punpckhbw,         INS_punpckhbw,          INS_punpckhwd,          INS_punpckhwd,          INS_punpckhdq,          INS_punpckhdq,          INS_punpckhqdq,         INS_punpckhqdq,         INS_invalid,            INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            UnpackLow,                                  16,              2,     {INS_punpcklbw,         INS_punpcklbw,          INS_punpcklwd,          INS_punpcklwd,          INS_punpckldq,          INS_punpckldq,          INS_punpcklqdq,         INS_punpcklqdq,         INS_invalid,            INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2,            Xor,                                        16,              2,     {INS_pxor,              INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_invalid,            INS_xorpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE2 64-bit-only Intrinsics
-HARDWARE_INTRINSIC(SSE2_X64_ConvertToInt64,                         "ConvertToInt64",                               SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_X64_ConvertToInt64WithTruncation,           "ConvertToInt64WithTruncation",                 SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_X64_ConvertToUInt64,                        "ConvertToUInt64",                              SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_X64_ConvertScalarToVector128Double,         "ConvertScalarToVector128Double",               SSE2_X64,     -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2sd,       INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_X64_ConvertScalarToVector128Int64,          "ConvertScalarToVector128Int64",                SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE2_X64_ConvertScalarToVector128UInt64,         "ConvertScalarToVector128UInt64",               SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE2_X64_StoreNonTemporal,                       "StoreNonTemporal",                             SSE2_X64,     -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movnti,         INS_movnti,         INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertToUInt64,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Double,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd,           INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Int64,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128UInt64,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64,        StoreNonTemporal,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movnti,             INS_movnti,             INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE3 Intrinsics
-HARDWARE_INTRINSIC(SSE3_AddSubtract,                                "AddSubtract",                                  SSE3,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addsubps,       INS_addsubpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE3_HorizontalAdd,                              "HorizontalAdd",                                SSE3,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_haddps,         INS_haddpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE3_HorizontalSubtract,                         "HorizontalSubtract",                           SSE3,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_hsubps,         INS_hsubpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE3_LoadAndDuplicateToVector128,                "LoadAndDuplicateToVector128",                  SSE3,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_lddqu,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movddup},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE3_LoadDquVector128,                           "LoadDquVector128",                             SSE3,         -1,              16,           1,     {INS_lddqu,             INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE3_MoveAndDuplicate,                           "MoveAndDuplicate",                             SSE3,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movddup},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE3_MoveHighAndDuplicate,                       "MoveHighAndDuplicate",                         SSE3,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movshdup,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate,                        "MoveLowAndDuplicate",                          SSE3,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movsldup,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3,            AddSubtract,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addsubps,           INS_addsubpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE3,            HorizontalAdd,                              16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_haddps,             INS_haddpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE3,            HorizontalSubtract,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_hsubps,             INS_hsubpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE3,            LoadAndDuplicateToVector128,                16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_lddqu,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movddup},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3,            LoadDquVector128,                           16,              1,     {INS_lddqu,             INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3,            MoveAndDuplicate,                           16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movddup},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3,            MoveHighAndDuplicate,                       16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movshdup,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3,            MoveLowAndDuplicate,                        16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movsldup,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSSE3 Intrinsics
-HARDWARE_INTRINSIC(SSSE3_Abs,                                       "Abs",                                          SSSE3,        -1,              16,           1,     {INS_invalid,           INS_pabsb,          INS_invalid,        INS_pabsw,          INS_invalid,        INS_pabsd,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSSE3_AlignRight,                                "AlignRight",                                   SSSE3,        -1,              16,           3,     {INS_palignr,           INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSSE3_HorizontalAdd,                             "HorizontalAdd",                                SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_phaddw,         INS_invalid,        INS_phaddd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_HorizontalAddSaturate,                     "HorizontalAddSaturate",                        SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_phaddsw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_HorizontalSubtract,                        "HorizontalSubtract",                           SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_phsubw,         INS_invalid,        INS_phsubd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_HorizontalSubtractSaturate,                "HorizontalSubtractSaturate",                   SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_phsubsw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_MultiplyAddAdjacent,                       "MultiplyAddAdjacent",                          SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pmaddubsw,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_MultiplyHighRoundScale,                    "MultiplyHighRoundScale",                       SSSE3,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pmulhrsw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_Shuffle,                                   "Shuffle",                                      SSSE3,        -1,              16,           2,     {INS_pshufb,            INS_pshufb,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSSE3_Sign,                                      "Sign",                                         SSSE3,        -1,              16,           2,     {INS_psignb,            INS_invalid,        INS_psignw,         INS_invalid,        INS_psignd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           Abs,                                        16,              1,     {INS_invalid,           INS_pabsb,              INS_invalid,            INS_pabsw,              INS_invalid,            INS_pabsd,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSSE3,           AlignRight,                                 16,              3,     {INS_palignr,           INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSSE3,           HorizontalAdd,                              16,              2,     {INS_invalid,           INS_invalid,            INS_phaddw,             INS_invalid,            INS_phaddd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           HorizontalAddSaturate,                      16,              2,     {INS_invalid,           INS_invalid,            INS_phaddsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           HorizontalSubtract,                         16,              2,     {INS_invalid,           INS_invalid,            INS_phsubw,             INS_invalid,            INS_phsubd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           HorizontalSubtractSaturate,                 16,              2,     {INS_invalid,           INS_invalid,            INS_phsubsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           MultiplyAddAdjacent,                        16,              2,     {INS_invalid,           INS_invalid,            INS_pmaddubsw,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           MultiplyHighRoundScale,                     16,              2,     {INS_invalid,           INS_invalid,            INS_pmulhrsw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           Shuffle,                                    16,              2,     {INS_pshufb,            INS_pshufb,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSSE3,           Sign,                                       16,              2,     {INS_psignb,            INS_invalid,            INS_psignw,             INS_invalid,            INS_psignd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE41 Intrinsics
-HARDWARE_INTRINSIC(SSE41_Blend,                                     "Blend",                                        SSE41,        -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_pblendw,        INS_pblendw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blendps,        INS_blendpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE41_BlendVariable,                             "BlendVariable",                                SSE41,        -1,              16,           3,     {INS_pblendvb,          INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_pblendvb,       INS_blendvps,       INS_blendvpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_Ceiling,                                   "Ceiling",                                      SSE41,        10,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_CeilingScalar,                             "CeilingScalar",                                SSE41,        10,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_CompareEqual,                              "CompareEqual",                                 SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pcmpeqq,        INS_pcmpeqq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16,                   "ConvertToVector128Int16",                      SSE41,        -1,              16,           1,     {INS_pmovsxbw,          INS_pmovzxbw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32,                   "ConvertToVector128Int32",                      SSE41,        -1,              16,           1,     {INS_pmovsxbd,          INS_pmovzxbd,       INS_pmovsxwd,       INS_pmovzxwd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64,                   "ConvertToVector128Int64",                      SSE41,        -1,              16,           1,     {INS_pmovsxbq,          INS_pmovzxbq,       INS_pmovsxwq,       INS_pmovzxwq,       INS_pmovsxdq,       INS_pmovzxdq,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_DotProduct,                                "DotProduct",                                   SSE41,        -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_dpps,           INS_dppd},              HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE41_Extract,                                   "Extract",                                      SSE41,        -1,              16,           2,     {INS_pextrb,            INS_pextrb,         INS_invalid,        INS_invalid,        INS_pextrd,         INS_pextrd,         INS_invalid,        INS_invalid,        INS_extractps,      INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_Floor,                                     "Floor",                                        SSE41,         9,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_FloorScalar,                               "FloorScalar",                                  SSE41,         9,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_Insert,                                    "Insert",                                       SSE41,        -1,              16,           3,     {INS_pinsrb,            INS_pinsrb,         INS_invalid,        INS_invalid,        INS_pinsrd,         INS_pinsrd,         INS_invalid,        INS_invalid,        INS_insertps,       INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE41_LoadAlignedVector128NonTemporal,           "LoadAlignedVector128NonTemporal",              SSE41,        -1,              16,           1,     {INS_movntdqa,          INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_Max,                                       "Max",                                          SSE41,        -1,              16,           2,     {INS_pmaxsb,            INS_invalid,        INS_invalid,        INS_pmaxuw,         INS_pmaxsd,         INS_pmaxud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_Min,                                       "Min",                                          SSE41,        -1,              16,           2,     {INS_pminsb,            INS_invalid,        INS_invalid,        INS_pminuw,         INS_pminsd,         INS_pminud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_MinHorizontal,                             "MinHorizontal",                                SSE41,        -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_phminposuw,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_MultipleSumAbsoluteDifferences,            "MultipleSumAbsoluteDifferences",               SSE41,        -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_mpsadbw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE41_Multiply,                                  "Multiply",                                     SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pmuldq,         INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE41_MultiplyLow,                               "MultiplyLow",                                  SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pmulld,         INS_pmulld,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_PackUnsignedSaturate,                      "PackUnsignedSaturate",                         SSE41,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_packusdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE41_RoundCurrentDirection,                     "RoundCurrentDirection",                        SSE41,         4,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_RoundCurrentDirectionScalar,               "RoundCurrentDirectionScalar",                  SSE41,         4,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToNearestInteger,                     "RoundToNearestInteger",                        SSE41,         8,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_RoundToNearestIntegerScalar,               "RoundToNearestIntegerScalar",                  SSE41,         8,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToNegativeInfinity,                   "RoundToNegativeInfinity",                      SSE41,         9,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_RoundToNegativeInfinityScalar,             "RoundToNegativeInfinityScalar",                SSE41,         9,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinity,                   "RoundToPositiveInfinity",                      SSE41,        10,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinityScalar,             "RoundToPositiveInfinityScalar",                SSE41,        10,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_RoundToZero,                               "RoundToZero",                                  SSE41,        11,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_RoundToZeroScalar,                         "RoundToZeroScalar",                            SSE41,        11,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_TestC,                                     "TestC",                                        SSE41,        -1,              16,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC,                           "TestNotZAndNotC",                              SSE41,        -1,              16,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE41_TestZ,                                     "TestZ",                                        SSE41,        -1,              16,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-
-// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+HARDWARE_INTRINSIC(SSE41,           Blend,                                      16,              3,     {INS_invalid,           INS_invalid,            INS_pblendw,            INS_pblendw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blendps,            INS_blendpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE41,           BlendVariable,                              16,              3,     {INS_pblendvb,          INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_pblendvb,           INS_blendvps,           INS_blendvpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41,           Ceiling,                                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           CeilingScalar,                              16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41,           CompareEqual,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pcmpeqq,            INS_pcmpeqq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE41,           ConvertToVector128Int16,                    16,              1,     {INS_pmovsxbw,          INS_pmovzxbw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           ConvertToVector128Int32,                    16,              1,     {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           ConvertToVector128Int64,                    16,              1,     {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           DotProduct,                                 16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_dpps,               INS_dppd},              HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE41,           Extract,                                    16,              2,     {INS_pextrb,            INS_pextrb,             INS_invalid,            INS_invalid,            INS_pextrd,             INS_pextrd,             INS_invalid,            INS_invalid,            INS_extractps,          INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           Floor,                                      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           FloorScalar,                                16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41,           Insert,                                     16,              3,     {INS_pinsrb,            INS_pinsrb,             INS_invalid,            INS_invalid,            INS_pinsrd,             INS_pinsrd,             INS_invalid,            INS_invalid,            INS_insertps,           INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE41,           LoadAlignedVector128NonTemporal,            16,              1,     {INS_movntdqa,          INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           Max,                                        16,              2,     {INS_pmaxsb,            INS_invalid,            INS_invalid,            INS_pmaxuw,             INS_pmaxsd,             INS_pmaxud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41,           Min,                                        16,              2,     {INS_pminsb,            INS_invalid,            INS_invalid,            INS_pminuw,             INS_pminsd,             INS_pminud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41,           MinHorizontal,                              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_phminposuw,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           MultipleSumAbsoluteDifferences,             16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_mpsadbw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE41,           Multiply,                                   16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuldq,             INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE41,           MultiplyLow,                                16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pmulld,             INS_pmulld,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41,           PackUnsignedSaturate,                       16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_packusdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41,           RoundCurrentDirection,                      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           RoundCurrentDirectionScalar,                16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41,           RoundToNearestInteger,                      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           RoundToNearestIntegerScalar,                16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41,           RoundToNegativeInfinity,                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           RoundToNegativeInfinityScalar,              16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41,           RoundToPositiveInfinity,                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           RoundToPositiveInfinityScalar,              16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41,           RoundToZero,                                16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           RoundToZeroScalar,                          16,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundss,            INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE41,           TestC,                                      16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41,           TestNotZAndNotC,                            16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41,           TestZ,                                      16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE41 64-bit-only Intrinsics
-HARDWARE_INTRINSIC(SSE41_X64_Extract,                               "Extract",                                      SSE41_X64,    -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pextrq,         INS_pextrq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_X64_Insert,                                "Insert",                                       SSE41_X64,    -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pinsrq,         INS_pinsrq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(SSE41_X64,       Extract,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pextrq,             INS_pextrq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_X64,       Insert,                                     16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pinsrq,             INS_pinsrq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE42 Intrinsics
-HARDWARE_INTRINSIC(SSE42_Crc32,                                     "Crc32",                                        SSE42,        -1,               0,           2,     {INS_invalid,           INS_crc32,          INS_invalid,        INS_crc32,          INS_invalid,        INS_crc32,          INS_invalid,        INS_invalid,         INS_invalid,        INS_invalid},          HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
-HARDWARE_INTRINSIC(SSE42_CompareGreaterThan,                        "CompareGreaterThan",                           SSE42,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pcmpgtq,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE42,           Crc32,                                       0,              2,     {INS_invalid,           INS_crc32,              INS_invalid,            INS_crc32,              INS_invalid,            INS_crc32,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
+HARDWARE_INTRINSIC(SSE42,           CompareGreaterThan,                         16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE42 Intrinsics
-HARDWARE_INTRINSIC(SSE42_X64_Crc32,                                 "Crc32",                                        SSE42_X64,    -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_crc32,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
+HARDWARE_INTRINSIC(SSE42_X64,       Crc32,                                       0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_crc32,              INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  AVX Intrinsics
-HARDWARE_INTRINSIC(AVX_Add,                                         "Add",                                          AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addps,          INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_AddSubtract,                                 "AddSubtract",                                  AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addsubps,       INS_addsubpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_And,                                         "And",                                          AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_andps,          INS_andpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_AndNot,                                      "AndNot",                                       AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_andnps,         INS_andnpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_Blend,                                       "Blend",                                        AVX,          -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blendps,        INS_blendpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX_BlendVariable,                               "BlendVariable",                                AVX,          -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vblendvps,      INS_vblendvpd},         HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_Ceiling,                                     "Ceiling",                                      AVX,          10,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_BroadcastScalarToVector128,                  "BroadcastScalarToVector128",                   AVX,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vbroadcastss,   INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_BroadcastScalarToVector256,                  "BroadcastScalarToVector256",                   AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vbroadcastss,   INS_vbroadcastsd},      HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_BroadcastVector128ToVector256,               "BroadcastVector128ToVector256",                AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vbroadcastf128, INS_vbroadcastf128},    HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_Compare,                                     "Compare",                                      AVX,          -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_cmppd},             HW_Category_IMM,                    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_CompareScalar,                               "CompareScalar",                                AVX,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_cmpsd},             HW_Category_IMM,                    HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(AVX_ConvertToVector128Int32,                     "ConvertToVector128Int32",                      AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtpd2dq,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ConvertToVector128Single,                    "ConvertToVector128Single",                     AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtpd2ps,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ConvertToVector256Int32,                     "ConvertToVector256Int32",                      AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtps2dq,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ConvertToVector256Single,                    "ConvertToVector256Single",                     AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtdq2ps,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ConvertToVector256Double,                    "ConvertToVector256Double",                     AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtdq2pd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtps2pd,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ConvertToVector128Int32WithTruncation,       "ConvertToVector128Int32WithTruncation",        AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttpd2dq,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ConvertToVector256Int32WithTruncation,       "ConvertToVector256Int32WithTruncation",        AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttps2dq,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_Divide,                                      "Divide",                                       AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divps,          INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_DotProduct,                                  "DotProduct",                                   AVX,          -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_dpps,           INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX_DuplicateEvenIndexed,                        "DuplicateEvenIndexed",                         AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movsldup,       INS_movddup},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_DuplicateOddIndexed,                         "DuplicateOddIndexed",                          AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movshdup,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ExtractVector128,                            "ExtractVector128",                             AVX,          -1,              32,           2,     {INS_vextractf128,      INS_vextractf128,   INS_vextractf128,   INS_vextractf128,   INS_vextractf128,   INS_vextractf128,   INS_vextractf128,   INS_vextractf128,   INS_vextractf128,   INS_vextractf128},      HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX_Floor,                                       "Floor",                                        AVX,           9,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_HorizontalAdd,                               "HorizontalAdd",                                AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_haddps,         INS_haddpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_HorizontalSubtract,                          "HorizontalSubtract",                           AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_hsubps,         INS_hsubpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_InsertVector128,                             "InsertVector128",                              AVX,          -1,              32,           3,     {INS_vinsertf128,       INS_vinsertf128,    INS_vinsertf128,    INS_vinsertf128,    INS_vinsertf128,    INS_vinsertf128,    INS_vinsertf128,    INS_vinsertf128,    INS_vinsertf128,    INS_vinsertf128},       HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX_LoadAlignedVector256,                        "LoadAlignedVector256",                         AVX,          -1,              32,           1,     {INS_movdqa,            INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movaps,         INS_movapd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_LoadDquVector256,                            "LoadDquVector256",                             AVX,          -1,              32,           1,     {INS_lddqu,             INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_lddqu,          INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_LoadVector256,                               "LoadVector256",                                AVX,          -1,              32,           1,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_Max,                                         "Max",                                          AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_maxps,          INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_Min,                                         "Min",                                          AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_minps,          INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_MaskLoad,                                    "MaskLoad",                                     AVX,          -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vmaskmovps,     INS_vmaskmovpd},        HW_Category_MemoryLoad,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX_MaskStore,                                   "MaskStore",                                    AVX,          -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vmaskmovps,     INS_vmaskmovpd},        HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(AVX_MoveMask,                                    "MoveMask",                                     AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movmskps,       INS_movmskpd},          HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX_Multiply,                                    "Multiply",                                     AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulps,          INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_Or,                                          "Or",                                           AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_orps,           INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX_Permute,                                     "Permute",                                      AVX,          -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vpermilps,      INS_vpermilpd},         HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX_Permute2x128,                                "Permute2x128",                                 AVX,          -1,              32,           3,     {INS_vperm2f128,        INS_vperm2f128,     INS_vperm2f128,     INS_vperm2f128,     INS_vperm2f128,     INS_vperm2f128,     INS_vperm2f128,     INS_vperm2f128,     INS_vperm2f128,     INS_vperm2f128},        HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX_PermuteVar,                                  "PermuteVar",                                   AVX,          -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vpermilpsvar,   INS_vpermilpdvar},      HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX_Reciprocal,                                  "Reciprocal",                                   AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_rcpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_ReciprocalSqrt,                              "ReciprocalSqrt",                               AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_rsqrtps,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_RoundCurrentDirection,                       "RoundCurrentDirection",                        AVX,           4,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_RoundToNearestInteger,                       "RoundToNearestInteger",                        AVX,           8,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_RoundToNegativeInfinity,                     "RoundToNegativeInfinity",                      AVX,           9,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_RoundToPositiveInfinity,                     "RoundToPositiveInfinity",                      AVX,          10,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_RoundToZero,                                 "RoundToZero",                                  AVX,          11,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_Shuffle,                                     "Shuffle",                                      AVX,          -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_shufps,         INS_shufpd},            HW_Category_IMM,                    HW_Flag_NoRMWSemantics|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX_Sqrt,                                        "Sqrt",                                         AVX,          -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sqrtps,         INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_Store,                                       "Store",                                        AVX,          -1,              32,           2,     {INS_movdqu,            INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movdqu,         INS_movups,         INS_movupd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(AVX_StoreAligned,                                "StoreAligned",                                 AVX,          -1,              32,           2,     {INS_movdqa,            INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movdqa,         INS_movaps,         INS_movapd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal,                     "StoreAlignedNonTemporal",                      AVX,          -1,              32,           2,     {INS_movntdq,           INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntps,        INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(AVX_Subtract,                                    "Subtract",                                     AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_subps,          INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_TestC,                                       "TestC",                                        AVX,          -1,               0,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_vtestps,        INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX_TestNotZAndNotC,                             "TestNotZAndNotC",                              AVX,          -1,               0,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_vtestps,        INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX_TestZ,                                       "TestZ",                                        AVX,          -1,               0,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_vtestps,        INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX_UnpackHigh,                                  "UnpackHigh",                                   AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_unpckhps,       INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_UnpackLow,                                   "UnpackLow",                                    AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_unpcklps,       INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_Xor,                                         "Xor",                                          AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_xorps,          INS_xorpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-
-// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+HARDWARE_INTRINSIC(AVX,             Add,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addps,              INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             AddSubtract,                                32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_addsubps,           INS_addsubpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             And,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andps,              INS_andpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             AndNot,                                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andnps,             INS_andnpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             Blend,                                      32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blendps,            INS_blendpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX,             BlendVariable,                              32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vblendvps,          INS_vblendvpd},         HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             Ceiling,                                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             BroadcastScalarToVector128,                 16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastss,       INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             BroadcastScalarToVector256,                 32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastss,       INS_vbroadcastsd},      HW_Category_MemoryLoad,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             BroadcastVector128ToVector256,              32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vbroadcastf128,     INS_vbroadcastf128},    HW_Category_MemoryLoad,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             Compare,                                    32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_IMM,                    HW_Flag_NoFlag)
+
+HARDWARE_INTRINSIC(AVX,             CompareEqual,                               32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             CompareGreaterThan,                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareGreaterThanOrEqual,                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareLessThan,                            32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareLessThanOrEqual,                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareNotEqual,                            32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             CompareNotGreaterThan,                      32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareNotGreaterThanOrEqual,               32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareNotLessThan,                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareNotLessThanOrEqual,                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareOrdered,                             32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             CompareUnordered,                           32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpps,              INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+
+HARDWARE_INTRINSIC(AVX,             CompareScalar,                              16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpss,              INS_cmpsd},             HW_Category_IMM,                    HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector128Single,                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector256Single,                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector256Double,                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector128Int32WithTruncation,      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttpd2dq,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ConvertToVector256Int32WithTruncation,      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttps2dq,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             Divide,                                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divps,              INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             DotProduct,                                 32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_dpps,               INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX,             DuplicateEvenIndexed,                       32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movsldup,           INS_movddup},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             DuplicateOddIndexed,                        32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movshdup,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ExtractVector128,                           32,              2,     {INS_vextractf128,      INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128,       INS_vextractf128},      HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX,             Floor,                                      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             HorizontalAdd,                              32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_haddps,             INS_haddpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             HorizontalSubtract,                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_hsubps,             INS_hsubpd},            HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             InsertVector128,                            32,              3,     {INS_vinsertf128,       INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128,        INS_vinsertf128},       HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX,             LoadAlignedVector256,                       32,              1,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movaps,             INS_movapd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             LoadDquVector256,                           32,              1,     {INS_lddqu,             INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_lddqu,              INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             LoadVector256,                              32,              1,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             Max,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxps,              INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             Min,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minps,              INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             MaskLoad,                                    0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryLoad,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX,             MaskStore,                                   0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX,             MoveMask,                                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movmskps,           INS_movmskpd},          HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX,             Multiply,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulps,              INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             Or,                                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_orps,               INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX,             Permute,                                     0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilps,          INS_vpermilpd},         HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX,             Permute2x128,                               32,              3,     {INS_vperm2f128,        INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128},        HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX,             PermuteVar,                                  0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilpsvar,       INS_vpermilpdvar},      HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX,             Reciprocal,                                 32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rcpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             ReciprocalSqrt,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rsqrtps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             RoundCurrentDirection,                      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             RoundToNearestInteger,                      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             RoundToNegativeInfinity,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             RoundToPositiveInfinity,                    32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             RoundToZero,                                32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             Shuffle,                                    32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_shufps,             INS_shufpd},            HW_Category_IMM,                    HW_Flag_NoRMWSemantics|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX,             Sqrt,                                       32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_sqrtps,             INS_sqrtpd},            HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             Store,                                      32,              2,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX,             StoreAligned,                               32,              2,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movaps,             INS_movapd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX,             StoreAlignedNonTemporal,                    32,              2,     {INS_movntdq,           INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntps,            INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX,             Subtract,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subps,              INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             TestC,                                       0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX,             TestNotZAndNotC,                             0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX,             TestZ,                                       0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX,             UnpackHigh,                                 32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpckhps,           INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             UnpackLow,                                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpcklps,           INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             Xor,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_xorps,              INS_xorpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  AVX2 Intrinsics
-HARDWARE_INTRINSIC(AVX2_Abs,                                        "Abs",                                          AVX2,         -1,              32,           1,     {INS_pabsb,             INS_pabsb,          INS_pabsw,          INS_pabsw,          INS_pabsd,          INS_pabsd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX2_Add,                                        "Add",                                          AVX2,         -1,              32,           2,     {INS_paddb,             INS_paddb,          INS_paddw,          INS_paddw,          INS_paddd,          INS_paddd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_AddSaturate,                                "AddSaturate",                                  AVX2,         -1,              32,           2,     {INS_paddsb,            INS_paddusb,        INS_paddsw,         INS_paddusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_AlignRight,                                 "AlignRight",                                   AVX2,         -1,              32,           3,     {INS_palignr,           INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_palignr,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_And,                                        "And",                                          AVX2,         -1,              32,           2,     {INS_pand,              INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_AndNot,                                     "AndNot",                                       AVX2,         -1,              32,           2,     {INS_pandn,             INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_Average,                                    "Average",                                      AVX2,         -1,              32,           2,     {INS_invalid,           INS_pavgb,          INS_invalid,        INS_pavgw,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_Blend,                                      "Blend",                                        AVX2,         -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_pblendw,        INS_pblendw,        INS_vpblendd,       INS_vpblendd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_UnfixedSIMDSize|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_BlendVariable,                              "BlendVariable",                                AVX2,         -1,              32,           3,     {INS_vpblendvb,         INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_vpblendvb,      INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector128,                 "BroadcastScalarToVector128",                   AVX2,         -1,              16,           1,     {INS_vpbroadcastb,      INS_vpbroadcastb,   INS_vpbroadcastw,   INS_vpbroadcastw,   INS_vpbroadcastd,   INS_vpbroadcastd,   INS_vpbroadcastq,   INS_vpbroadcastq,   INS_vbroadcastss,   INS_movddup},           HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
-HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector256,                 "BroadcastScalarToVector256",                   AVX2,         -1,              32,           1,     {INS_vpbroadcastb,      INS_vpbroadcastb,   INS_vpbroadcastw,   INS_vpbroadcastw,   INS_vpbroadcastd,   INS_vpbroadcastd,   INS_vpbroadcastq,   INS_vpbroadcastq,   INS_vbroadcastss,   INS_vbroadcastsd},      HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
-HARDWARE_INTRINSIC(AVX2_BroadcastVector128ToVector256,              "BroadcastVector128ToVector256",                AVX2,         -1,              32,           1,     {INS_vbroadcasti128,    INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_CompareEqual,                               "CompareEqual",                                 AVX2,         -1,              32,           2,     {INS_pcmpeqb,           INS_pcmpeqb,        INS_pcmpeqw,        INS_pcmpeqw,        INS_pcmpeqd,        INS_pcmpeqd,        INS_pcmpeqq,        INS_pcmpeqq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_CompareGreaterThan,                         "CompareGreaterThan",                           AVX2,         -1,              32,           2,     {INS_pcmpgtb,           INS_invalid,        INS_pcmpgtw,        INS_invalid,        INS_pcmpgtd,        INS_invalid,        INS_pcmpgtq,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_ExtractVector128,                           "ExtractVector128",                             AVX2,         -1,              32,           2,     {INS_vextracti128,      INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ConvertToInt32,                             "ConvertToInt32",                               AVX2,         -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX2_ConvertToUInt32,                            "ConvertToUInt32",                              AVX2,         -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16,                    "ConvertToVector256Int16",                      AVX2,         -1,              32,           1,     {INS_pmovsxbw,          INS_pmovzxbw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32,                    "ConvertToVector256Int32",                      AVX2,         -1,              32,           1,     {INS_pmovsxbd,          INS_pmovzxbd,       INS_pmovsxwd,       INS_pmovzxwd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64,                    "ConvertToVector256Int64",                      AVX2,         -1,              32,           1,     {INS_pmovsxbq,          INS_pmovzxbq,       INS_pmovsxwq,       INS_pmovzxwq,       INS_pmovsxdq,       INS_pmovzxdq,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX2_GatherVector128,                            "GatherVector128",                              AVX2,         -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpgatherdd,     INS_vpgatherdd,     INS_vpgatherdq,     INS_vpgatherdq,     INS_vgatherdps,     INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(AVX2_GatherVector256,                            "GatherVector256",                              AVX2,         -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpgatherdd,     INS_vpgatherdd,     INS_vpgatherdq,     INS_vpgatherdq,     INS_vgatherdps,     INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(AVX2_GatherMaskVector128,                        "GatherMaskVector128",                          AVX2,         -1,              16,           5,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpgatherdd,     INS_vpgatherdd,     INS_vpgatherdq,     INS_vpgatherdq,     INS_vgatherdps,     INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(AVX2_GatherMaskVector256,                        "GatherMaskVector256",                          AVX2,         -1,              32,           5,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpgatherdd,     INS_vpgatherdd,     INS_vpgatherdq,     INS_vpgatherdq,     INS_vgatherdps,     INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
-HARDWARE_INTRINSIC(AVX2_HorizontalAdd,                              "HorizontalAdd",                                AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_phaddw,         INS_invalid,        INS_phaddd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate,                      "HorizontalAddSaturate",                        AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_phaddsw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_HorizontalSubtract,                         "HorizontalSubtract",                           AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_phsubw,         INS_invalid,        INS_phsubd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate,                 "HorizontalSubtractSaturate",                   AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_phsubsw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_InsertVector128,                            "InsertVector128",                              AVX2,         -1,              32,           3,     {INS_vinserti128,       INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal,            "LoadAlignedVector256NonTemporal",              AVX2,         -1,              32,           1,     {INS_movntdqa,          INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_MaskLoad,                                   "MaskLoad",                                     AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpmaskmovd,     INS_vpmaskmovd,     INS_vpmaskmovq,     INS_vpmaskmovq,     INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX2_MaskStore,                                  "MaskStore",                                    AVX2,         -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpmaskmovd,     INS_vpmaskmovd,     INS_vpmaskmovq,     INS_vpmaskmovq,     INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(AVX2_Max,                                        "Max",                                          AVX2,         -1,              32,           2,     {INS_pmaxsb,            INS_pmaxub,         INS_pmaxsw,         INS_pmaxuw,         INS_pmaxsd,         INS_pmaxud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_Min,                                        "Min",                                          AVX2,         -1,              32,           2,     {INS_pminsb,            INS_pminub,         INS_pminsw,         INS_pminuw,         INS_pminsd,         INS_pminud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_MoveMask,                                   "MoveMask",                                     AVX2,         -1,              32,           1,     {INS_pmovmskb,          INS_pmovmskb,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX2_Multiply,                                   "Multiply",                                     AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pmuldq,         INS_pmuludq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_MultipleSumAbsoluteDifferences,             "MultipleSumAbsoluteDifferences",               AVX2,         -1,              32,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_mpsadbw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_MultiplyAddAdjacent,                        "MultiplyAddAdjacent",                          AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmaddubsw,      INS_invalid,        INS_pmaddwd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_MultiplyHigh,                               "MultiplyHigh",                                 AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmulhw,         INS_pmulhuw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_MultiplyHighRoundScale,                     "MultiplyHighRoundScale",                       AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmulhrsw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_MultiplyLow,                                "MultiplyLow",                                  AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pmullw,         INS_pmullw,         INS_pmulld,         INS_pmulld,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_Or,                                         "Or",                                           AVX2,         -1,              32,           2,     {INS_por,               INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_por,            INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2_Permute2x128,                               "Permute2x128",                                 AVX2,         -1,              32,           3,     {INS_vperm2i128,        INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_vperm2i128,     INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_Permute4x64,                                "Permute4x64",                                  AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vpermq,         INS_vpermq,         INS_invalid,        INS_vpermpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_PermuteVar8x32,                             "PermuteVar8x32",                               AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpermd,         INS_vpermd,         INS_invalid,        INS_invalid,        INS_vpermps,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(AVX2_PackSignedSaturate,                         "PackSignedSaturate",                           AVX2,         -1,              32,           2,     {INS_packsswb,          INS_invalid,        INS_packssdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_PackUnsignedSaturate,                       "PackUnsignedSaturate",                         AVX2,         -1,              32,           2,     {INS_invalid,           INS_packuswb,       INS_invalid,        INS_packusdw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical,                           "ShiftLeftLogical",                             AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_psllw,          INS_psllw,          INS_pslld,          INS_pslld,          INS_psllq,          INS_psllq,          INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical128BitLane,                 "ShiftLeftLogical128BitLane",                   AVX2,         -1,              32,           2,     {INS_pslldq,            INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_pslldq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShiftLeftLogicalVariable,                   "ShiftLeftLogicalVariable",                     AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpsllvd,        INS_vpsllvd,        INS_vpsllvq,        INS_vpsllvq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX2_ShiftRightArithmetic,                       "ShiftRightArithmetic",                         AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_psraw,          INS_invalid,        INS_psrad,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShiftRightArithmeticVariable,               "ShiftRightArithmeticVariable",                 AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpsravd,        INS_vpsravd,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX2_ShiftRightLogical,                          "ShiftRightLogical",                            AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_psrlw,          INS_psrlw,          INS_psrld,          INS_psrld,          INS_psrlq,          INS_psrlq,          INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShiftRightLogical128BitLane,                "ShiftRightLogical128BitLane",                  AVX2,         -1,              32,           2,     {INS_psrldq,            INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_psrldq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShiftRightLogicalVariable,                  "ShiftRightLogicalVariable",                    AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpsrlvd,        INS_vpsrlvd,        INS_vpsrlvq,        INS_vpsrlvq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX2_Shuffle,                                    "Shuffle",                                      AVX2,         -1,              32,           2,     {INS_pshufb,            INS_pshufb,         INS_invalid,        INS_invalid,        INS_pshufd,         INS_pshufd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM)
-HARDWARE_INTRINSIC(AVX2_ShuffleHigh,                                "ShuffleHigh",                                  AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pshufhw,        INS_pshufhw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShuffleLow,                                 "ShuffleLow",                                   AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_pshuflw,        INS_pshuflw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_Sign,                                       "Sign",                                         AVX2,         -1,              32,           2,     {INS_psignb,            INS_invalid,        INS_psignw,         INS_invalid,        INS_psignd,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_SumAbsoluteDifferences,                     "SumAbsoluteDifferences",                       AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_psadbw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_Subtract,                                   "Subtract",                                     AVX2,         -1,              32,           2,     {INS_psubb,             INS_psubb,          INS_psubw,          INS_psubw,          INS_psubd,          INS_psubd,          INS_psubq,          INS_psubq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_SubtractSaturate,                           "SubtractSaturate",                             AVX2,         -1,              32,           2,     {INS_psubsb,            INS_psubusb,        INS_psubsw,         INS_psubusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_UnpackHigh,                                 "UnpackHigh",                                   AVX2,         -1,              32,           2,     {INS_punpckhbw,         INS_punpckhbw,      INS_punpckhwd,      INS_punpckhwd,      INS_punpckhdq,      INS_punpckhdq,      INS_punpckhqdq,     INS_punpckhqdq,     INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_UnpackLow,                                  "UnpackLow",                                    AVX2,         -1,              32,           2,     {INS_punpcklbw,         INS_punpcklbw,      INS_punpcklwd,      INS_punpcklwd,      INS_punpckldq,      INS_punpckldq,      INS_punpcklqdq,     INS_punpcklqdq,     INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2_Xor,                                        "Xor",                                          AVX2,         -1,              32,           2,     {INS_pxor,              INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_pxor,           INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-
-// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+HARDWARE_INTRINSIC(AVX2,            Abs,                                        32,              1,     {INS_pabsb,             INS_pabsb,              INS_pabsw,              INS_pabsw,              INS_pabsd,              INS_pabsd,              INS_paddq,              INS_paddq,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2,            Add,                                        32,              2,     {INS_paddb,             INS_paddb,              INS_paddw,              INS_paddw,              INS_paddd,              INS_paddd,              INS_paddq,              INS_paddq,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            AddSaturate,                                32,              2,     {INS_paddsb,            INS_paddusb,            INS_paddsw,             INS_paddusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            AlignRight,                                 32,              3,     {INS_palignr,           INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_palignr,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            And,                                        32,              2,     {INS_pand,              INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            AndNot,                                     32,              2,     {INS_pandn,             INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            Average,                                    32,              2,     {INS_invalid,           INS_pavgb,              INS_invalid,            INS_pavgw,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            Blend,                                       0,              3,     {INS_invalid,           INS_invalid,            INS_pblendw,            INS_pblendw,            INS_vpblendd,           INS_vpblendd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_UnfixedSIMDSize|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            BlendVariable,                              32,              3,     {INS_vpblendvb,         INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            BroadcastScalarToVector128,                 16,              1,     {INS_vpbroadcastb,      INS_vpbroadcastb,       INS_vpbroadcastw,       INS_vpbroadcastw,       INS_vpbroadcastd,       INS_vpbroadcastd,       INS_vpbroadcastq,       INS_vpbroadcastq,       INS_vbroadcastss,       INS_movddup},           HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
+HARDWARE_INTRINSIC(AVX2,            BroadcastScalarToVector256,                 32,              1,     {INS_vpbroadcastb,      INS_vpbroadcastb,       INS_vpbroadcastw,       INS_vpbroadcastw,       INS_vpbroadcastd,       INS_vpbroadcastd,       INS_vpbroadcastq,       INS_vpbroadcastq,       INS_vbroadcastss,       INS_vbroadcastsd},      HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
+HARDWARE_INTRINSIC(AVX2,            BroadcastVector128ToVector256,              32,              1,     {INS_vbroadcasti128,    INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_vbroadcasti128,     INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            CompareEqual,                               32,              2,     {INS_pcmpeqb,           INS_pcmpeqb,            INS_pcmpeqw,            INS_pcmpeqw,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqq,            INS_pcmpeqq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            CompareGreaterThan,                         32,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            ExtractVector128,                           32,              2,     {INS_vextracti128,      INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            ConvertToInt32,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2,            ConvertToUInt32,                            32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int16,                    32,              1,     {INS_pmovsxbw,          INS_pmovzxbw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int32,                    32,              1,     {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int64,                    32,              1,     {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX2,            GatherVector128,                            16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2,            GatherVector256,                            32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2,            GatherMaskVector128,                        16,              5,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2,            GatherMaskVector256,                        32,              5,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpgatherdd,         INS_vpgatherdd,         INS_vpgatherdq,         INS_vpgatherdq,         INS_vgatherdps,         INS_vgatherdpd},        HW_Category_IMM,                    HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2,            HorizontalAdd,                              32,              2,     {INS_invalid,           INS_invalid,            INS_phaddw,             INS_invalid,            INS_phaddd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            HorizontalAddSaturate,                      32,              2,     {INS_invalid,           INS_invalid,            INS_phaddsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            HorizontalSubtract,                         32,              2,     {INS_invalid,           INS_invalid,            INS_phsubw,             INS_invalid,            INS_phsubd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            HorizontalSubtractSaturate,                 32,              2,     {INS_invalid,           INS_invalid,            INS_phsubsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            InsertVector128,                            32,              3,     {INS_vinserti128,       INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            LoadAlignedVector256NonTemporal,            32,              1,     {INS_movntdqa,          INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            MaskLoad,                                    0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2,            MaskStore,                                   0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX2,            Max,                                        32,              2,     {INS_pmaxsb,            INS_pmaxub,             INS_pmaxsw,             INS_pmaxuw,             INS_pmaxsd,             INS_pmaxud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            Min,                                        32,              2,     {INS_pminsb,            INS_pminub,             INS_pminsw,             INS_pminuw,             INS_pminsd,             INS_pminud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            MoveMask,                                   32,              1,     {INS_pmovmskb,          INS_pmovmskb,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX2,            Multiply,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pmuldq,             INS_pmuludq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            MultipleSumAbsoluteDifferences,             32,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_mpsadbw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            MultiplyAddAdjacent,                        32,              2,     {INS_invalid,           INS_invalid,            INS_pmaddubsw,          INS_invalid,            INS_pmaddwd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            MultiplyHigh,                               32,              2,     {INS_invalid,           INS_invalid,            INS_pmulhw,             INS_pmulhuw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            MultiplyHighRoundScale,                     32,              2,     {INS_invalid,           INS_invalid,            INS_pmulhrsw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            MultiplyLow,                                32,              2,     {INS_invalid,           INS_invalid,            INS_pmullw,             INS_pmullw,             INS_pmulld,             INS_pmulld,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            Or,                                         32,              2,     {INS_por,               INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_por,                INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2,            Permute2x128,                               32,              3,     {INS_vperm2i128,        INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_vperm2i128,         INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            Permute4x64,                                32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermq,             INS_vpermq,             INS_invalid,            INS_vpermpd},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            PermuteVar8x32,                             32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermd,             INS_vpermd,             INS_invalid,            INS_invalid,            INS_vpermps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(AVX2,            PackSignedSaturate,                         32,              2,     {INS_packsswb,          INS_invalid,            INS_packssdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            PackUnsignedSaturate,                       32,              2,     {INS_invalid,           INS_packuswb,           INS_invalid,            INS_packusdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogical,                           32,              2,     {INS_invalid,           INS_invalid,            INS_psllw,              INS_psllw,              INS_pslld,              INS_pslld,              INS_psllq,              INS_psllq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogical128BitLane,                 32,              2,     {INS_pslldq,            INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogicalVariable,                    0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsllvd,            INS_vpsllvd,            INS_vpsllvq,            INS_vpsllvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2,            ShiftRightArithmetic,                       32,              2,     {INS_invalid,           INS_invalid,            INS_psraw,              INS_invalid,            INS_psrad,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            ShiftRightArithmeticVariable,                0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsravd,            INS_vpsravd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2,            ShiftRightLogical,                          32,              2,     {INS_invalid,           INS_invalid,            INS_psrlw,              INS_psrlw,              INS_psrld,              INS_psrld,              INS_psrlq,              INS_psrlq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            ShiftRightLogical128BitLane,                32,              2,     {INS_psrldq,            INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            ShiftRightLogicalVariable,                   0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsrlvd,            INS_vpsrlvd,            INS_vpsrlvq,            INS_vpsrlvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2,            Shuffle,                                    32,              2,     {INS_pshufb,            INS_pshufb,             INS_invalid,            INS_invalid,            INS_pshufd,             INS_pshufd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM)
+HARDWARE_INTRINSIC(AVX2,            ShuffleHigh,                                32,              2,     {INS_invalid,           INS_invalid,            INS_pshufhw,            INS_pshufhw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            ShuffleLow,                                 32,              2,     {INS_invalid,           INS_invalid,            INS_pshuflw,            INS_pshuflw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            Sign,                                       32,              2,     {INS_psignb,            INS_invalid,            INS_psignw,             INS_invalid,            INS_psignd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            SumAbsoluteDifferences,                     32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_psadbw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            Subtract,                                   32,              2,     {INS_psubb,             INS_psubb,              INS_psubw,              INS_psubw,              INS_psubd,              INS_psubd,              INS_psubq,              INS_psubq,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            SubtractSaturate,                           32,              2,     {INS_psubsb,            INS_psubusb,            INS_psubsw,             INS_psubusw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            UnpackHigh,                                 32,              2,     {INS_punpckhbw,         INS_punpckhbw,          INS_punpckhwd,          INS_punpckhwd,          INS_punpckhdq,          INS_punpckhdq,          INS_punpckhqdq,         INS_punpckhqdq,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            UnpackLow,                                  32,              2,     {INS_punpcklbw,         INS_punpcklbw,          INS_punpcklwd,          INS_punpcklwd,          INS_punpckldq,          INS_punpckldq,          INS_punpcklqdq,         INS_punpcklqdq,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            Xor,                                        32,              2,     {INS_pxor,              INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  AES Intrinsics
-HARDWARE_INTRINSIC(AES_Decrypt,                                     "Decrypt",                                      AES,          -1,              16,           2,     {INS_invalid,           INS_aesdec,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AES_DecryptLast,                                 "DecryptLast",                                  AES,          -1,              16,           2,     {INS_invalid,           INS_aesdeclast,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AES_Encrypt,                                     "Encrypt",                                      AES,          -1,              16,           2,     {INS_invalid,           INS_aesenc,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AES_EncryptLast,                                 "EncryptLast",                                  AES,          -1,              16,           2,     {INS_invalid,           INS_aesenclast,     INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AES_InverseMixColumns,                           "InverseMixColumns",                            AES,          -1,              16,           1,     {INS_invalid,           INS_aesimc,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AES_KeygenAssist,                                "KeygenAssist",                                 AES,          -1,              16,           2,     {INS_invalid,           INS_aeskeygenassist,INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AES,             Decrypt,                                    16,              2,     {INS_invalid,           INS_aesdec,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AES,             DecryptLast,                                16,              2,     {INS_invalid,           INS_aesdeclast,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AES,             Encrypt,                                    16,              2,     {INS_invalid,           INS_aesenc,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AES,             EncryptLast,                                16,              2,     {INS_invalid,           INS_aesenclast,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AES,             InverseMixColumns,                          16,              1,     {INS_invalid,           INS_aesimc,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AES,             KeygenAssist,                               16,              2,     {INS_invalid,           INS_aeskeygenassist,    INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI1 Intrinsics
-HARDWARE_INTRINSIC(BMI1_AndNot,                                     "AndNot",                                       BMI1,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_andn,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_ExtractLowestSetBit,                        "ExtractLowestSetBit",                          BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsi,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_GetMaskUpToLowestSetBit,                    "GetMaskUpToLowestSetBit",                      BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsmsk,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_ResetLowestSetBit,                          "ResetLowestSetBit",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsr,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZeroCount",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
-HARDWARE_INTRINSIC(BMI1_BitFieldExtract,                            "BitFieldExtract",                              BMI1,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_bextr,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(BMI1,            AndNot,                                      0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_andn,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1,            ExtractLowestSetBit,                         0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_blsi,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1,            GetMaskUpToLowestSetBit,                     0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_blsmsk,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1,            ResetLowestSetBit,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_blsr,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1,            TrailingZeroCount,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_tzcnt,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(BMI1,            BitFieldExtract,                             0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_bextr,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns|HW_Flag_SpecialImport)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI1 Intrinsics
-HARDWARE_INTRINSIC(BMI1_X64_AndNot,                                 "AndNot",                                       BMI1_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_andn,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_X64_ExtractLowestSetBit,                    "ExtractLowestSetBit",                          BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blsi,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_X64_GetMaskUpToLowestSetBit,                "GetMaskUpToLowestSetBit",                      BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blsmsk,         INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_X64_ResetLowestSetBit,                      "ResetLowestSetBit",                            BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blsr,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_X64_TrailingZeroCount,                      "TrailingZeroCount",                            BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
-HARDWARE_INTRINSIC(BMI1_X64_BitFieldExtract,                        "BitFieldExtract",                              BMI1_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_bextr,          INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(BMI1_X64,        AndNot,                                      0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_andn,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64,        ExtractLowestSetBit,                         0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blsi,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64,        GetMaskUpToLowestSetBit,                     0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blsmsk,             INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64,        ResetLowestSetBit,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_blsr,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64,        TrailingZeroCount,                           0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_tzcnt,              INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(BMI1_X64,        BitFieldExtract,                             0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_bextr,              INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns|HW_Flag_SpecialImport)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI2 Intrinsics
-HARDWARE_INTRINSIC(BMI2_ParallelBitDeposit,                         "ParallelBitDeposit",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pdep,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI2_ParallelBitExtract,                         "ParallelBitExtract",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pext,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI2_ZeroHighBits,                               "ZeroHighBits",                                 BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_bzhi,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(BMI2_MultiplyNoFlags,                            "MultiplyNoFlags",                              BMI2,         -1,               0,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mulx,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2,            ParallelBitDeposit,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pdep,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2,            ParallelBitExtract,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_pext,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2,            ZeroHighBits,                                0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_bzhi,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(BMI2,            MultiplyNoFlags,                             0,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mulx,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI2 Intrinsics
-HARDWARE_INTRINSIC(BMI2_X64_ParallelBitDeposit,                     "ParallelBitDeposit",                           BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pdep,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI2_X64_ParallelBitExtract,                     "ParallelBitExtract",                           BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pext,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI2_X64_ZeroHighBits,                           "ZeroHighBits",                                 BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_bzhi,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(BMI2_X64_MultiplyNoFlags,                        "MultiplyNoFlags",                              BMI2_X64,     -1,               0,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulx,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_X64,        ParallelBitDeposit,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pdep,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_X64,        ParallelBitExtract,                          0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pext,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_X64,        ZeroHighBits,                                0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_bzhi,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(BMI2_X64,        MultiplyNoFlags,                             0,             -1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulx,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  FMA Intrinsics
-HARDWARE_INTRINSIC(FMA_MultiplyAdd,                                 "MultiplyAdd",                                  FMA,          -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfmadd213ps,    INS_vfmadd213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA_MultiplyAddNegated,                          "MultiplyAddNegated",                           FMA,          -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfnmadd213ps,   INS_vfnmadd213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA_MultiplyAddNegatedScalar,                    "MultiplyAddNegatedScalar",                     FMA,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfnmadd213ss,   INS_vfnmadd213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(FMA_MultiplyAddScalar,                           "MultiplyAddScalar",                            FMA,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfmadd213ss,    INS_vfmadd213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(FMA_MultiplyAddSubtract,                         "MultiplyAddSubtract",                          FMA,          -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfmaddsub213ps, INS_vfmaddsub213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA_MultiplySubtract,                            "MultiplySubtract",                             FMA,          -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfmsub213ps,    INS_vfmsub213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA_MultiplySubtractAdd,                         "MultiplySubtractAdd",                          FMA,          -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfmsubadd213ps, INS_vfmsubadd213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA_MultiplySubtractNegated,                     "MultiplySubtractNegated",                      FMA,          -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfnmsub213ps,   INS_vfnmsub213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA_MultiplySubtractScalar,                      "MultiplySubtractScalar",                       FMA,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfmsub213ss,    INS_vfmsub213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(FMA_MultiplySubtractNegatedScalar,               "MultiplySubtractNegatedScalar",                FMA,          -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_vfnmsub213ss,   INS_vfnmsub213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(FMA,             MultiplyAdd,                                 0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ps,        INS_vfmadd213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplyAddNegated,                          0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ps,       INS_vfnmadd213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplyAddNegatedScalar,                   16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ss,       INS_vfnmadd213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(FMA,             MultiplyAddScalar,                          16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ss,        INS_vfmadd213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(FMA,             MultiplyAddSubtract,                         0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmaddsub213ps,     INS_vfmaddsub213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtract,                            0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ps,        INS_vfmsub213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtractAdd,                         0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsubadd213ps,     INS_vfmsubadd213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtractNegated,                     0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ps,       INS_vfnmsub213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtractScalar,                     16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ss,        INS_vfmsub213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtractNegatedScalar,              16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ss,       INS_vfnmsub213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  LZCNT Intrinsics
-HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount,                          "LeadingZeroCount",                             LZCNT,        -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_lzcnt,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(LZCNT,           LeadingZeroCount,                            0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_lzcnt,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  LZCNT Intrinsics
-HARDWARE_INTRINSIC(LZCNT_X64_LeadingZeroCount,                      "LeadingZeroCount",                             LZCNT_X64,    -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_lzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(LZCNT_X64,       LeadingZeroCount,                            0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_lzcnt,              INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  PCLMULQDQ Intrinsics
-HARDWARE_INTRINSIC(PCLMULQDQ_CarrylessMultiply,                     "CarrylessMultiply",                            PCLMULQDQ,    -1,               16,          3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pclmulqdq,      INS_pclmulqdq,      INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(PCLMULQDQ,       CarrylessMultiply,                          16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_pclmulqdq,          INS_pclmulqdq,          INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  POPCNT Intrinsics
-HARDWARE_INTRINSIC(POPCNT_PopCount,                                 "PopCount",                                     POPCNT,       -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_popcnt,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(POPCNT,          PopCount,                                    0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_popcnt,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
-//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
-//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  POPCNT Intrinsics
-HARDWARE_INTRINSIC(POPCNT_X64_PopCount,                             "PopCount",                                     POPCNT_X64,   -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_popcnt,         INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(POPCNT_X64,      PopCount,                                    0,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_popcnt,             INS_invalid,            INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 // Special intrinsics that are generated during lowering
-HARDWARE_INTRINSIC(SSE_COMISS,                                      "COMISS",                                       SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comiss,         INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_UCOMISS,                                     "UCOMISS",                                      SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomiss,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_COMISD,                                     "COMISD",                                       SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_UCOMISD,                                    "UCOMISD",                                      SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_PTEST,                                     "PTEST",                                        SSE41,        -1,              16,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX_PTEST,                                       "PTEST",                                        AVX,          -1,               0,           2,     {INS_ptest,             INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_ptest,          INS_vtestps,        INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             COMISS,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comiss,             INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE,             UCOMISS,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomiss,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            COMISD,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_comisd},            HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            UCOMISD,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_ucomisd},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41,           PTEST,                                      16,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX,             PTEST,                                       0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 
 #endif // FEATURE_HW_INTRINSIC
 
index 7a26f60..286a079 100644 (file)
@@ -726,8 +726,8 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_Count:
-        case NI_Vector256_Count:
+        case NI_Vector128_get_Count:
+        case NI_Vector256_get_Count:
         {
             assert(sig->numArgs == 0);
 
@@ -786,7 +786,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector128_Zero:
+        case NI_Vector128_get_Zero:
         {
             assert(sig->numArgs == 0);
 
@@ -831,7 +831,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
-        case NI_Vector256_Zero:
+        case NI_Vector256_get_Zero:
         {
             assert(sig->numArgs == 0);
 
index f156d9b..f4969a5 100644 (file)
@@ -29,8 +29,8 @@ enum NamedIntrinsic : unsigned short
 
     NI_HW_INTRINSIC_START,
 #if defined(TARGET_XARCH)
-#define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
-    NI_##id,
+#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag)           \
+    NI_##isa##_##name,
 #include "hwintrinsiclistxarch.h"
 #elif defined(TARGET_ARM64)
 #define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag)           \
index 53e94a6..c17d18a 100644 (file)
@@ -168,8 +168,8 @@ ValueNumFuncDef(SIMD_##id, argCount, false, false, false)   // All of the SIMD i
 #define VNF_SIMD_FIRST VNF_SIMD_None
 
 #if defined(TARGET_XARCH)
-#define HARDWARE_INTRINSIC(id, name, isa, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
-ValueNumFuncDef(HWI_##id, argCount, false, false, false)   // All of the HARDWARE_INTRINSICS for x86/x64
+#define HARDWARE_INTRINSIC(isa, name, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
+ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false)   // All of the HARDWARE_INTRINSICS for x86/x64
 #include "hwintrinsiclistxarch.h"
 #define VNF_HWI_FIRST VNF_HWI_Vector128_As