Remove HW_Flag_UnfixedSIMDSize (#35594)
authorEgor Chesakov <Egor.Chesakov@microsoft.com>
Thu, 30 Apr 2020 02:40:29 +0000 (19:40 -0700)
committerGitHub <noreply@github.com>
Thu, 30 Apr 2020 02:40:29 +0000 (19:40 -0700)
* Remove HW_Flag_UnfixedSIMDSize usages in hwintrinsiclistarm64.h

* Remove HW_Flag_UnfixedSIMDSize usages in hwintrinsiclistxarch.h

* Remove HW_Flag_UnfixedSIMDSize and HWIntrinsicInfo::HasFixedSimdSize() in hwintrinsic.cpp hwintrinsic.h

* Change HWIntrinsicInfo::simdSize type to int and remove all the casts in hwintrinsic.cpp hwintrinsic.h

src/coreclr/src/jit/hwintrinsic.cpp
src/coreclr/src/jit/hwintrinsic.h
src/coreclr/src/jit/hwintrinsiclistarm64.h
src/coreclr/src/jit/hwintrinsiclistxarch.h

index e376d06..0b88de5 100644 (file)
@@ -11,11 +11,11 @@ static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
 // clang-format off
 #if defined(TARGET_XARCH)
 #define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
-    {NI_##isa##_##name, #name, InstructionSet_##isa, static_cast<unsigned>(size), numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
+    {NI_##isa##_##name, #name, InstructionSet_##isa, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
 #include "hwintrinsiclistxarch.h"
 #elif defined (TARGET_ARM64)
 #define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
-    {NI_##isa##_##name, #name, InstructionSet_##isa, static_cast<unsigned>(size), numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
+    {NI_##isa##_##name, #name, InstructionSet_##isa, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
 #include "hwintrinsiclistarm64.h"
 #else
 #error Unsupported platform
@@ -349,9 +349,11 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler*   comp,
 //    get the SIMD size from the GenTreeHWIntrinsic node.
 unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig)
 {
-    if (HWIntrinsicInfo::HasFixedSimdSize(id))
+    unsigned simdSize = 0;
+
+    if (tryLookupSimdSize(id, &simdSize))
     {
-        return lookupSimdSize(id);
+        return simdSize;
     }
 
     CORINFO_CLASS_HANDLE typeHnd = nullptr;
@@ -371,7 +373,6 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI
         typeHnd = sig->retTypeSigClass;
     }
 
-    unsigned  simdSize = 0;
     var_types baseType = comp->getBaseTypeAndSizeOfSIMDType(typeHnd, &simdSize);
     assert((simdSize > 0) && (baseType != TYP_UNKNOWN));
     return simdSize;
index 0a87b7c..960b658 100644 (file)
@@ -57,67 +57,63 @@ enum HWIntrinsicFlag : unsigned int
     // - should be transformed in the compiler front-end, cannot reach CodeGen
     HW_Flag_NoCodeGen = 0x4,
 
-    // Unfixed SIMD-size
-    // - overloaded on multiple vector sizes (SIMD size in the table is unreliable)
-    HW_Flag_UnfixedSIMDSize = 0x8,
-
     // Multi-instruction
     // - that one intrinsic can generate multiple instructions
-    HW_Flag_MultiIns = 0x10,
+    HW_Flag_MultiIns = 0x8,
 
     // Select base type using the first argument type
-    HW_Flag_BaseTypeFromFirstArg = 0x20,
+    HW_Flag_BaseTypeFromFirstArg = 0x10,
 
     // Select base type using the second argument type
-    HW_Flag_BaseTypeFromSecondArg = 0x40,
+    HW_Flag_BaseTypeFromSecondArg = 0x20,
 
     // Indicates compFloatingPointUsed does not need to be set.
-    HW_Flag_NoFloatingPointUsed = 0x80,
+    HW_Flag_NoFloatingPointUsed = 0x40,
 
     // Maybe IMM
     // the intrinsic has either imm or Vector overloads
-    HW_Flag_MaybeIMM = 0x100,
+    HW_Flag_MaybeIMM = 0x80,
 
     // NoJmpTable IMM
     // the imm intrinsic does not need jumptable fallback when it gets non-const argument
-    HW_Flag_NoJmpTableIMM = 0x200,
+    HW_Flag_NoJmpTableIMM = 0x100,
 
     // Special codegen
     // the intrinsics need special rules in CodeGen,
     // but may be table-driven in the front-end
-    HW_Flag_SpecialCodeGen = 0x400,
+    HW_Flag_SpecialCodeGen = 0x200,
 
     // Special import
     // the intrinsics need special rules in importer,
     // but may be table-driven in the back-end
-    HW_Flag_SpecialImport = 0x800,
+    HW_Flag_SpecialImport = 0x400,
 
 // The below is for defining platform-specific flags
 #if defined(TARGET_XARCH)
     // Copy Upper bits
     // some SIMD scalar intrinsics need the semantics of copying upper bits from the source operand
-    HW_Flag_CopyUpperBits = 0x1000,
+    HW_Flag_CopyUpperBits = 0x800,
 
     // Maybe Memory Load/Store
     // - some intrinsics may have pointer overloads but without HW_Category_MemoryLoad/HW_Category_MemoryStore
-    HW_Flag_MaybeMemoryLoad  = 0x2000,
-    HW_Flag_MaybeMemoryStore = 0x4000,
+    HW_Flag_MaybeMemoryLoad  = 0x1000,
+    HW_Flag_MaybeMemoryStore = 0x2000,
 
     // No Read/Modify/Write Semantics
     // the intrinsic doesn't have read/modify/write semantics in two/three-operand form.
-    HW_Flag_NoRMWSemantics = 0x8000,
+    HW_Flag_NoRMWSemantics = 0x4000,
 
     // NoContainment
     // the intrinsic cannot be handled by comtainment,
     // all the intrinsic that have explicit memory load/store semantics should have this flag
-    HW_Flag_NoContainment = 0x10000,
+    HW_Flag_NoContainment = 0x8000,
 
 #elif defined(TARGET_ARM64)
     // The intrinsic has read/modify/write semantics in multiple-operands form.
-    HW_Flag_HasRMWSemantics = 0x1000,
+    HW_Flag_HasRMWSemantics = 0x800,
 
     // The intrinsic supports some sort of containment analysis.
-    HW_Flag_SupportsContainment = 0x2000,
+    HW_Flag_SupportsContainment = 0x1000,
 #else
 #error Unsupported platform
 #endif
@@ -254,7 +250,7 @@ struct HWIntrinsicInfo
     NamedIntrinsic         id;
     const char*            name;
     CORINFO_InstructionSet isa;
-    unsigned               simdSize;
+    int                    simdSize;
     int                    numArgs;
     instruction            ins[10];
     HWIntrinsicCategory    category;
@@ -469,9 +465,15 @@ struct HWIntrinsicInfo
     }
 #endif
 
-    static unsigned lookupSimdSize(NamedIntrinsic id)
+    static bool tryLookupSimdSize(NamedIntrinsic id, unsigned* pSimdSize)
     {
-        return lookup(id).simdSize;
+        bool succeeded = false;
+        if (lookup(id).simdSize != -1)
+        {
+            *pSimdSize = lookup(id).simdSize;
+            succeeded  = true;
+        }
+        return succeeded;
     }
 
     static int lookupNumArgs(NamedIntrinsic id)
@@ -519,12 +521,6 @@ struct HWIntrinsicInfo
         return (flags & HW_Flag_NoCodeGen) == 0;
     }
 
-    static bool HasFixedSimdSize(NamedIntrinsic id)
-    {
-        HWIntrinsicFlag flags = lookupFlags(id);
-        return (flags & HW_Flag_UnfixedSIMDSize) == 0;
-    }
-
     static bool GeneratesMultipleIns(NamedIntrinsic id)
     {
         HWIntrinsicFlag flags = lookupFlags(id);
index 3ed83db..2afaec5 100644 (file)
@@ -54,71 +54,71 @@ HARDWARE_INTRINSIC(Vector128,       get_Zero,                                  1
 //                                                                                                  {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  AdvSimd Intrinsics
-HARDWARE_INTRINSIC(AdvSimd,         Abs,                                       -1,           1,     {INS_abs,               INS_invalid,        INS_abs,            INS_invalid,        INS_abs,            INS_invalid,        INS_invalid,        INS_invalid,        INS_fabs,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AdvSimd,         Abs,                                       -1,           1,     {INS_abs,               INS_invalid,        INS_abs,            INS_invalid,        INS_abs,            INS_invalid,        INS_invalid,        INS_invalid,        INS_fabs,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd,         AbsScalar,                                  8,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fabs,           INS_fabs},              HW_Category_SIMDScalar,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareGreaterThan,                -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareGreaterThanOrEqual,         -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareLessThan,                   -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareLessThanOrEqual,            -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         AbsoluteDifference,                        -1,           2,     {INS_sabd,              INS_uabd,           INS_sabd,           INS_uabd,           INS_sabd,           INS_uabd,           INS_invalid,        INS_invalid,        INS_fabd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AdvSimd,         AbsoluteDifferenceAdd,                     -1,           3,     {INS_saba,              INS_uaba,           INS_saba,           INS_uaba,           INS_saba,           INS_uaba,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_HasRMWSemantics)
-HARDWARE_INTRINSIC(AdvSimd,         Add,                                       -1,           2,     {INS_add,               INS_add,            INS_add,            INS_add,            INS_add,            INS_add,            INS_add,            INS_add,            INS_fadd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareGreaterThan,                -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareGreaterThanOrEqual,         -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareLessThan,                   -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AdvSimd,         AbsoluteCompareLessThanOrEqual,            -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_facge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AdvSimd,         AbsoluteDifference,                        -1,           2,     {INS_sabd,              INS_uabd,           INS_sabd,           INS_uabd,           INS_sabd,           INS_uabd,           INS_invalid,        INS_invalid,        INS_fabd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AdvSimd,         AbsoluteDifferenceAdd,                     -1,           3,     {INS_saba,              INS_uaba,           INS_saba,           INS_uaba,           INS_saba,           INS_uaba,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
+HARDWARE_INTRINSIC(AdvSimd,         Add,                                       -1,           2,     {INS_add,               INS_add,            INS_add,            INS_add,            INS_add,            INS_add,            INS_add,            INS_add,            INS_fadd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         AddPairwise,                                8,           2,     {INS_addp,              INS_addp,           INS_addp,           INS_addp,           INS_addp,           INS_addp,           INS_invalid,        INS_invalid,        INS_faddp,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd,         AddScalar,                                  8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_add,            INS_add,            INS_fadd,           INS_fadd},              HW_Category_SIMDScalar,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         And,                                       -1,           2,     {INS_and,               INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and},               HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         BitwiseClear,                              -1,           2,     {INS_bic,               INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic},               HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         BitwiseSelect,                             -1,           3,     {INS_bsl,               INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl},               HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         CompareEqual,                              -1,           2,     {INS_cmeq,              INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_invalid,        INS_invalid,        INS_fcmeq,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         CompareGreaterThan,                        -1,           2,     {INS_cmgt,              INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_invalid,        INS_invalid,        INS_fcmgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         CompareGreaterThanOrEqual,                 -1,           2,     {INS_cmge,              INS_cmhs,           INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_invalid,        INS_invalid,        INS_fcmge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         CompareLessThan,                           -1,           2,     {INS_cmgt,              INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_invalid,        INS_invalid,        INS_fcmgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         CompareLessThanOrEqual,                    -1,           2,     {INS_cmge,              INS_cmhs,           INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_invalid,        INS_invalid,        INS_fcmge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         CompareTest,                               -1,           2,     {INS_cmtst,             INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_invalid,        INS_invalid,        INS_cmtst,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd,         And,                                       -1,           2,     {INS_and,               INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and,            INS_and},               HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         BitwiseClear,                              -1,           2,     {INS_bic,               INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic,            INS_bic},               HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         BitwiseSelect,                             -1,           3,     {INS_bsl,               INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl},               HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AdvSimd,         CompareEqual,                              -1,           2,     {INS_cmeq,              INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_invalid,        INS_invalid,        INS_fcmeq,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         CompareGreaterThan,                        -1,           2,     {INS_cmgt,              INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_invalid,        INS_invalid,        INS_fcmgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         CompareGreaterThanOrEqual,                 -1,           2,     {INS_cmge,              INS_cmhs,           INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_invalid,        INS_invalid,        INS_fcmge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         CompareLessThan,                           -1,           2,     {INS_cmgt,              INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_invalid,        INS_invalid,        INS_fcmgt,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AdvSimd,         CompareLessThanOrEqual,                    -1,           2,     {INS_cmge,              INS_cmhs,           INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_invalid,        INS_invalid,        INS_fcmge,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AdvSimd,         CompareTest,                               -1,           2,     {INS_cmtst,             INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_invalid,        INS_invalid,        INS_cmtst,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         DivideScalar,                               8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fdiv,           INS_fdiv},              HW_Category_SIMDScalar,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         Extract,                                   -1,           2,     {INS_smov,              INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},               HW_Category_IMM,                    HW_Flag_SupportsContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(AdvSimd,         Extract,                                   -1,           2,     {INS_smov,              INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},               HW_Category_IMM,                    HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd,         ExtractAndNarrowHigh,                      16,           2,     {INS_xtn2,              INS_xtn2,           INS_xtn2,           INS_xtn2,           INS_xtn2,           INS_xtn2,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
 HARDWARE_INTRINSIC(AdvSimd,         ExtractAndNarrowLow,                        8,           1,     {INS_xtn,               INS_xtn,            INS_xtn,            INS_xtn,            INS_xtn,            INS_xtn,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd,         ExtractVector64,                            8,           3,     {INS_ext,               INS_ext,            INS_ext,            INS_ext,            INS_ext,            INS_ext,            INS_invalid,        INS_invalid,        INS_ext,            INS_invalid},           HW_Category_IMM,                    HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd,         ExtractVector128,                          16,           3,     {INS_ext,               INS_ext,            INS_ext,            INS_ext,            INS_ext,            INS_ext,            INS_ext,            INS_ext,            INS_ext,            INS_ext},               HW_Category_IMM,                    HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplyAdd,                          -1,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmla,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_HasRMWSemantics)
+HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplyAdd,                          -1,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmla,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
 HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplyAddScalar,                     8,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmadd,          INS_fmadd},             HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplyAddNegatedScalar,              8,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fnmadd,         INS_fnmadd},            HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplySubtract,                     -1,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmls,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_HasRMWSemantics)
+HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplySubtract,                     -1,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmls,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
 HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplySubtractScalar,                8,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmsub,          INS_fmsub},             HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd,         FusedMultiplySubtractNegatedScalar,         8,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fnmsub,         INS_fnmsub},            HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(AdvSimd,         Insert,                                    -1,           3,     {INS_ins,               INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins},               HW_Category_IMM,                    HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize|HW_Flag_HasRMWSemantics)
-HARDWARE_INTRINSIC(AdvSimd,         LeadingSignCount,                          -1,           1,     {INS_cls,               INS_invalid,        INS_cls,            INS_invalid,        INS_cls,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         LeadingZeroCount,                          -1,           1,     {INS_clz,               INS_clz,            INS_clz,            INS_clz,            INS_clz,            INS_clz,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd,         Insert,                                    -1,           3,     {INS_ins,               INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins,            INS_ins},               HW_Category_IMM,                    HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
+HARDWARE_INTRINSIC(AdvSimd,         LeadingSignCount,                          -1,           1,     {INS_cls,               INS_invalid,        INS_cls,            INS_invalid,        INS_cls,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         LeadingZeroCount,                          -1,           1,     {INS_clz,               INS_clz,            INS_clz,            INS_clz,            INS_clz,            INS_clz,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd,         LoadVector64,                               8,           1,     {INS_ld1,               INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1},               HW_Category_MemoryLoad,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd,         LoadVector128,                             16,           1,     {INS_ld1,               INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1,            INS_ld1},               HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         Max,                                       -1,           2,     {INS_smax,              INS_umax,           INS_smax,           INS_umax,           INS_smax,           INS_umax,           INS_invalid,        INS_invalid,        INS_fmax,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         MaxNumber,                                 -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnm,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         Max,                                       -1,           2,     {INS_smax,              INS_umax,           INS_smax,           INS_umax,           INS_smax,           INS_umax,           INS_invalid,        INS_invalid,        INS_fmax,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         MaxNumber,                                 -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnm,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         MaxNumberScalar,                            8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnm,         INS_fmaxnm},            HW_Category_SIMDScalar,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         MaxPairwise,                                8,           2,     {INS_smaxp,             INS_umaxp,          INS_smaxp,          INS_umaxp,          INS_smaxp,          INS_umaxp,          INS_invalid,        INS_invalid,        INS_fmaxp,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         Min,                                       -1,           2,     {INS_smin,              INS_umin,           INS_smin,           INS_umin,           INS_smin,           INS_umin,           INS_invalid,        INS_invalid,        INS_fmin,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         MinNumber,                                 -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnm,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         Min,                                       -1,           2,     {INS_smin,              INS_umin,           INS_smin,           INS_umin,           INS_smin,           INS_umin,           INS_invalid,        INS_invalid,        INS_fmin,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         MinNumber,                                 -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnm,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         MinNumberScalar,                            8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnm,         INS_fminnm},            HW_Category_SIMDScalar,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         MinPairwise,                                8,           2,     {INS_sminp,             INS_uminp,          INS_sminp,          INS_uminp,          INS_sminp,          INS_uminp,          INS_invalid,        INS_invalid,        INS_fminp,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         Multiply,                                  -1,           2,     {INS_mul,               INS_mul,            INS_mul,            INS_mul,            INS_mul,            INS_mul,            INS_invalid,        INS_invalid,        INS_fmul,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         Multiply,                                  -1,           2,     {INS_mul,               INS_mul,            INS_mul,            INS_mul,            INS_mul,            INS_mul,            INS_invalid,        INS_invalid,        INS_fmul,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         MultiplyScalar,                             8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmul,           INS_fmul},              HW_Category_SIMDScalar,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         MultiplyAdd,                               -1,           3,     {INS_mla,               INS_mla,            INS_mla,            INS_mla,            INS_mla,            INS_mla,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_HasRMWSemantics)
-HARDWARE_INTRINSIC(AdvSimd,         MultiplySubtract,                          -1,           3,     {INS_mls,               INS_mls,            INS_mls,            INS_mls,            INS_mls,            INS_mls,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_HasRMWSemantics)
-HARDWARE_INTRINSIC(AdvSimd,         Negate,                                    -1,           1,     {INS_neg,               INS_invalid,        INS_neg,            INS_invalid,        INS_neg,            INS_invalid,        INS_invalid,        INS_invalid,        INS_fneg,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd,         MultiplyAdd,                               -1,           3,     {INS_mla,               INS_mla,            INS_mla,            INS_mla,            INS_mla,            INS_mla,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
+HARDWARE_INTRINSIC(AdvSimd,         MultiplySubtract,                          -1,           3,     {INS_mls,               INS_mls,            INS_mls,            INS_mls,            INS_mls,            INS_mls,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
+HARDWARE_INTRINSIC(AdvSimd,         Negate,                                    -1,           1,     {INS_neg,               INS_invalid,        INS_neg,            INS_invalid,        INS_neg,            INS_invalid,        INS_invalid,        INS_invalid,        INS_fneg,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd,         NegateScalar,                               8,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fneg,           INS_fneg},              HW_Category_SIMDScalar,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         Not,                                       -1,           1,     {INS_mvn,               INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn},               HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         Or,                                        -1,           2,     {INS_orr,               INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr},               HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         OrNot,                                     -1,           2,     {INS_orn,               INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn},               HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         PolynomialMultiply,                        -1,           2,     {INS_pmul,              INS_pmul,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         PopCount,                                  -1,           1,     {INS_cnt,               INS_cnt,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         ReciprocalEstimate,                        -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_urecpe,         INS_invalid,        INS_invalid,        INS_frecpe,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         ReciprocalSquareRootEstimate,              -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ursqrte,        INS_invalid,        INS_invalid,        INS_frsqrte,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd,         ReciprocalSquareRootStep,                  -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frsqrts,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd,         ReciprocalStep,                            -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frecps,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         Not,                                       -1,           1,     {INS_mvn,               INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn,            INS_mvn},               HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         Or,                                        -1,           2,     {INS_orr,               INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr,            INS_orr},               HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         OrNot,                                     -1,           2,     {INS_orn,               INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn,            INS_orn},               HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         PolynomialMultiply,                        -1,           2,     {INS_pmul,              INS_pmul,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         PopCount,                                  -1,           1,     {INS_cnt,               INS_cnt,            INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         ReciprocalEstimate,                        -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_urecpe,         INS_invalid,        INS_invalid,        INS_frecpe,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         ReciprocalSquareRootEstimate,              -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_ursqrte,        INS_invalid,        INS_invalid,        INS_frsqrte,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,         ReciprocalSquareRootStep,                  -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frsqrts,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         ReciprocalStep,                            -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frecps,         INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd,         SqrtScalar,                                 8,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fsqrt,          INS_fsqrt},             HW_Category_SIMDScalar,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         Store,                                     -1,           2,     {INS_st1,               INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1},               HW_Category_MemoryStore,            HW_Flag_UnfixedSIMDSize|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(AdvSimd,         Subtract,                                  -1,           2,     {INS_sub,               INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_fsub,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd,         Store,                                     -1,           2,     {INS_st1,               INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1,            INS_st1},               HW_Category_MemoryStore,            HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AdvSimd,         Subtract,                                  -1,           2,     {INS_sub,               INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_sub,            INS_fsub,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd,         SubtractScalar,                             8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sub,            INS_sub,            INS_fsub,           INS_fsub},              HW_Category_SIMDScalar,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,         Xor,                                       -1,           2,     {INS_eor,               INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor},               HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd,         Xor,                                       -1,           2,     {INS_eor,               INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor,            INS_eor},               HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                            SIMD size       NumArg                                                                                                     Instructions                                                                                                     Category                            Flags
@@ -138,9 +138,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64,   AbsoluteCompareLessThanScalar,
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   AbsoluteDifference,                        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fabd},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   AbsoluteDifferenceScalar,                   8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fabd,           INS_fabd},              HW_Category_SIMDScalar,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   Add,                                       16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fadd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   AddAcross,                                 -1,           1,     {INS_addv,              INS_addv,           INS_addv,           INS_addv,           INS_addv,           INS_addv,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   AddAcross,                                 -1,           1,     {INS_addv,              INS_addv,           INS_addv,           INS_addv,           INS_addv,           INS_addv,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   AddPairwise,                               16,           2,     {INS_addp,              INS_addp,           INS_addp,           INS_addp,           INS_addp,           INS_addp,           INS_addp,           INS_addp,           INS_faddp,          INS_faddp},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   AddPairwiseScalar,                         -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addp,           INS_addp,           INS_faddp,          INS_faddp},             HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   AddPairwiseScalar,                         -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addp,           INS_addp,           INS_faddp,          INS_faddp},             HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   CompareEqual,                              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmeq,           INS_cmeq,           INS_invalid,        INS_fcmeq},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   CompareEqualScalar,                         8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmeq,           INS_cmeq,           INS_fcmeq,          INS_fcmeq},             HW_Category_SIMDScalar,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   CompareGreaterThan,                        16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmgt,           INS_cmhi,           INS_invalid,        INS_fcmgt},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -153,29 +153,29 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64,   CompareLessThanOrEqualScalar,
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   CompareLessThanScalar,                      8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmgt,           INS_cmhi,           INS_fcmgt,          INS_fcmgt},             HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   CompareTest,                               16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmtst,          INS_cmtst,          INS_invalid,        INS_cmtst},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   CompareTestScalar,                          8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmtst,          INS_cmtst,          INS_invalid,        INS_cmtst},             HW_Category_SIMDScalar,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   Divide,                                    -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fdiv,           INS_fdiv},              HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   Divide,                                    -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fdiv,           INS_fdiv},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   FusedMultiplyAdd,                          16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmla},              HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   FusedMultiplySubtract,                     16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmls},              HW_Category_SimpleSIMD,             HW_Flag_HasRMWSemantics)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   Max,                                       16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmax},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxAcross,                                 -1,           1,     {INS_smaxv,             INS_umaxv,          INS_smaxv,          INS_umaxv,          INS_smaxv,          INS_umaxv,          INS_invalid,        INS_invalid,        INS_fmaxv,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxAcross,                                 -1,           1,     {INS_smaxv,             INS_umaxv,          INS_smaxv,          INS_umaxv,          INS_smaxv,          INS_umaxv,          INS_invalid,        INS_invalid,        INS_fmaxv,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxNumber,                                 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnm},            HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxNumberAcross,                           16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnmv,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxNumberPairwise,                         -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnmp,        INS_fmaxnmp},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxNumberPairwiseScalar,                   -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnmp,        INS_fmaxnmp},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxNumberPairwise,                         -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnmp,        INS_fmaxnmp},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxNumberPairwiseScalar,                   -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxnmp,        INS_fmaxnmp},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxPairwise,                               16,           2,     {INS_smaxp,             INS_umaxp,          INS_smaxp,          INS_umaxp,          INS_smaxp,          INS_umaxp,          INS_invalid,        INS_invalid,        INS_fmaxp,          INS_fmaxp},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxPairwiseScalar,                         -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxp,          INS_fmaxp},             HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxPairwiseScalar,                         -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmaxp,          INS_fmaxp},             HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MaxScalar,                                  8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmax,           INS_fmax},              HW_Category_SIMDScalar,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   Min,                                       16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmin},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinAcross,                                 -1,           1,     {INS_sminv,             INS_uminv,          INS_sminv,          INS_uminv,          INS_sminv,          INS_uminv,          INS_invalid,        INS_invalid,        INS_fminv,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinAcross,                                 -1,           1,     {INS_sminv,             INS_uminv,          INS_sminv,          INS_uminv,          INS_sminv,          INS_uminv,          INS_invalid,        INS_invalid,        INS_fminv,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinNumber,                                 16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnm},            HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinNumberAcross,                           16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnmv,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinNumberPairwise,                         -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnmp,        INS_fminnmp},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinNumberPairwiseScalar,                   -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnmp,        INS_fminnmp},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinNumberPairwise,                         -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnmp,        INS_fminnmp},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinNumberPairwiseScalar,                   -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminnmp,        INS_fminnmp},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinPairwise,                               16,           2,     {INS_sminp,             INS_uminp,          INS_sminp,          INS_uminp,          INS_sminp,          INS_uminp,          INS_invalid,        INS_invalid,        INS_fminp,          INS_fminp},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinPairwiseScalar,                         -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminp,          INS_fminp},             HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinPairwiseScalar,                         -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fminp,          INS_fminp},             HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MinScalar,                                  8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmin,           INS_fmin},              HW_Category_SIMDScalar,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   Multiply,                                  16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmul},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   MultiplyExtended,                          -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmulx,          INS_fmulx},             HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   MultiplyExtended,                          -1,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmulx,          INS_fmulx},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   MultiplyExtendedScalar,                     8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fmulx,          INS_fmulx},             HW_Category_SIMDScalar,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   Negate,                                    16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_neg,            INS_invalid,        INS_invalid,        INS_fneg},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   NegateScalar,                               8,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_neg,            INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoFlag)
@@ -188,15 +188,15 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64,   ReciprocalSquareRootStep,                  1
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   ReciprocalSquareRootStepScalar,             8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frsqrts,        INS_frsqrts},           HW_Category_SIMDScalar,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   ReciprocalStep,                            16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frecps},            HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   ReciprocalStepScalar,                       8,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frecps,         INS_frecps},            HW_Category_SIMDScalar,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   ReverseElementBits,                        -1,           1,     {INS_rbit,              INS_rbit,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   Sqrt,                                      -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fsqrt,          INS_fsqrt},             HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   ReverseElementBits,                        -1,           1,     {INS_rbit,              INS_rbit,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   Sqrt,                                      -1,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fsqrt,          INS_fsqrt},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd_Arm64,   Subtract,                                  16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_fsub},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   TransposeEven,                             -1,           2,     {INS_trn1,              INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1},              HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   TransposeOdd,                              -1,           2,     {INS_trn2,              INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2},              HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   UnzipEven,                                 -1,           2,     {INS_uzp1,              INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1},              HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   UnzipOdd,                                  -1,           2,     {INS_uzp2,              INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2},              HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   ZipHigh,                                   -1,           2,     {INS_zip2,              INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2},              HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AdvSimd_Arm64,   ZipLow,                                    -1,           2,     {INS_zip1,              INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1},              HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   TransposeEven,                             -1,           2,     {INS_trn1,              INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1,           INS_trn1},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   TransposeOdd,                              -1,           2,     {INS_trn2,              INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2,           INS_trn2},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   UnzipEven,                                 -1,           2,     {INS_uzp1,              INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1,           INS_uzp1},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   UnzipOdd,                                  -1,           2,     {INS_uzp2,              INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2,           INS_uzp2},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   ZipHigh,                                   -1,           2,     {INS_zip2,              INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2,           INS_zip2},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd_Arm64,   ZipLow,                                    -1,           2,     {INS_zip1,              INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1,           INS_zip1},              HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                            SIMD size       NumArg                                                                                                     Instructions                                                                                                     Category                            Flags
index ab9a6d9..49240a4 100644 (file)
@@ -464,14 +464,14 @@ HARDWARE_INTRINSIC(AVX,             LoadDquVector256,
 HARDWARE_INTRINSIC(AVX,             LoadVector256,                              32,              1,     {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_MemoryLoad,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX,             Max,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_maxps,              INS_maxpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX,             Min,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_minps,              INS_minpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX,             MaskLoad,                                    0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryLoad,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX,             MaskStore,                                   0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX,             MaskLoad,                                   -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryLoad,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX,             MaskStore,                                  -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vmaskmovps,         INS_vmaskmovpd},        HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(AVX,             MoveMask,                                   32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movmskps,           INS_movmskpd},          HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX,             Multiply,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mulps,              INS_mulpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX,             Or,                                         32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_orps,               INS_orpd},              HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX,             Permute,                                     0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilps,          INS_vpermilpd},         HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX,             Permute,                                    -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilps,          INS_vpermilpd},         HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX,             Permute2x128,                               32,              3,     {INS_vperm2f128,        INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128,         INS_vperm2f128},        HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX,             PermuteVar,                                  0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilpsvar,       INS_vpermilpdvar},      HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX,             PermuteVar,                                 -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vpermilpsvar,       INS_vpermilpdvar},      HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             Reciprocal,                                 32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rcpps,              INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX,             ReciprocalSqrt,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_rsqrtps,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX,             RoundCurrentDirection,                      32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_roundps,            INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
@@ -485,9 +485,9 @@ HARDWARE_INTRINSIC(AVX,             Store,
 HARDWARE_INTRINSIC(AVX,             StoreAligned,                               32,              2,     {INS_movdqa,            INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movdqa,             INS_movaps,             INS_movapd},            HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(AVX,             StoreAlignedNonTemporal,                    32,              2,     {INS_movntdq,           INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntdq,            INS_movntps,            INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(AVX,             Subtract,                                   32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_subps,              INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX,             TestC,                                       0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX,             TestNotZAndNotC,                             0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX,             TestZ,                                       0,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX,             TestC,                                      -1,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX,             TestNotZAndNotC,                            -1,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX,             TestZ,                                      -1,              2,     {INS_ptest,             INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_ptest,              INS_vtestps,            INS_vtestpd},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX,             UnpackHigh,                                 32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpckhps,           INS_unpckhpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             UnpackLow,                                  32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_unpcklps,           INS_unpcklpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX,             Xor,                                        32,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_xorps,              INS_xorpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
@@ -504,7 +504,7 @@ HARDWARE_INTRINSIC(AVX2,            AlignRight,
 HARDWARE_INTRINSIC(AVX2,            And,                                        32,              2,     {INS_pand,              INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_pand,               INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2,            AndNot,                                     32,              2,     {INS_pandn,             INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_pandn,              INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            Average,                                    32,              2,     {INS_invalid,           INS_pavgb,              INS_invalid,            INS_pavgw,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-HARDWARE_INTRINSIC(AVX2,            Blend,                                       0,              3,     {INS_invalid,           INS_invalid,            INS_pblendw,            INS_pblendw,            INS_vpblendd,           INS_vpblendd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_UnfixedSIMDSize|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2,            Blend,                                      -1,              3,     {INS_invalid,           INS_invalid,            INS_pblendw,            INS_pblendw,            INS_vpblendd,           INS_vpblendd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2,            BlendVariable,                              32,              3,     {INS_vpblendvb,         INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_vpblendvb,          INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            BroadcastScalarToVector128,                 16,              1,     {INS_vpbroadcastb,      INS_vpbroadcastb,       INS_vpbroadcastw,       INS_vpbroadcastw,       INS_vpbroadcastd,       INS_vpbroadcastd,       INS_vpbroadcastq,       INS_vpbroadcastq,       INS_vbroadcastss,       INS_movddup},           HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
 HARDWARE_INTRINSIC(AVX2,            BroadcastScalarToVector256,                 32,              1,     {INS_vpbroadcastb,      INS_vpbroadcastb,       INS_vpbroadcastw,       INS_vpbroadcastw,       INS_vpbroadcastd,       INS_vpbroadcastd,       INS_vpbroadcastq,       INS_vpbroadcastq,       INS_vbroadcastss,       INS_vbroadcastsd},      HW_Category_SIMDScalar,             HW_Flag_MaybeMemoryLoad)
@@ -527,8 +527,8 @@ HARDWARE_INTRINSIC(AVX2,            HorizontalSubtract,
 HARDWARE_INTRINSIC(AVX2,            HorizontalSubtractSaturate,                 32,              2,     {INS_invalid,           INS_invalid,            INS_phsubsw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            InsertVector128,                            32,              3,     {INS_vinserti128,       INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_vinserti128,        INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2,            LoadAlignedVector256NonTemporal,            32,              1,     {INS_movntdqa,          INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_movntdqa,           INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX2,            MaskLoad,                                    0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX2,            MaskStore,                                   0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX2,            MaskLoad,                                   -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AVX2,            MaskStore,                                  -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpmaskmovd,         INS_vpmaskmovd,         INS_vpmaskmovq,         INS_vpmaskmovq,         INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(AVX2,            Max,                                        32,              2,     {INS_pmaxsb,            INS_pmaxub,             INS_pmaxsw,             INS_pmaxuw,             INS_pmaxsd,             INS_pmaxud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2,            Min,                                        32,              2,     {INS_pminsb,            INS_pminub,             INS_pminsw,             INS_pminuw,             INS_pminsd,             INS_pminud,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2,            MoveMask,                                   32,              1,     {INS_pmovmskb,          INS_pmovmskb,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
@@ -546,12 +546,12 @@ HARDWARE_INTRINSIC(AVX2,            PackSignedSaturate,
 HARDWARE_INTRINSIC(AVX2,            PackUnsignedSaturate,                       32,              2,     {INS_invalid,           INS_packuswb,           INS_invalid,            INS_packusdw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogical,                           32,              2,     {INS_invalid,           INS_invalid,            INS_psllw,              INS_psllw,              INS_pslld,              INS_pslld,              INS_psllq,              INS_psllq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogical128BitLane,                 32,              2,     {INS_pslldq,            INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_pslldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogicalVariable,                    0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsllvd,            INS_vpsllvd,            INS_vpsllvq,            INS_vpsllvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2,            ShiftLeftLogicalVariable,                   -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsllvd,            INS_vpsllvd,            INS_vpsllvq,            INS_vpsllvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            ShiftRightArithmetic,                       32,              2,     {INS_invalid,           INS_invalid,            INS_psraw,              INS_invalid,            INS_psrad,              INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2,            ShiftRightArithmeticVariable,                0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsravd,            INS_vpsravd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2,            ShiftRightArithmeticVariable,               -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsravd,            INS_vpsravd,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            ShiftRightLogical,                          32,              2,     {INS_invalid,           INS_invalid,            INS_psrlw,              INS_psrlw,              INS_psrld,              INS_psrld,              INS_psrlq,              INS_psrlq,              INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2,            ShiftRightLogical128BitLane,                32,              2,     {INS_psrldq,            INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_psrldq,             INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2,            ShiftRightLogicalVariable,                   0,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsrlvd,            INS_vpsrlvd,            INS_vpsrlvq,            INS_vpsrlvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2,            ShiftRightLogicalVariable,                  -1,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_vpsrlvd,            INS_vpsrlvd,            INS_vpsrlvq,            INS_vpsrlvq,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            Shuffle,                                    32,              2,     {INS_pshufb,            INS_pshufb,             INS_invalid,            INS_invalid,            INS_pshufd,             INS_pshufd,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM)
 HARDWARE_INTRINSIC(AVX2,            ShuffleHigh,                                32,              2,     {INS_invalid,           INS_invalid,            INS_pshufhw,            INS_pshufhw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2,            ShuffleLow,                                 32,              2,     {INS_invalid,           INS_invalid,            INS_pshuflw,            INS_pshuflw,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
@@ -624,14 +624,14 @@ HARDWARE_INTRINSIC(BMI2_X64,        MultiplyNoFlags,
 //                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  FMA Intrinsics
-HARDWARE_INTRINSIC(FMA,             MultiplyAdd,                                 0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ps,        INS_vfmadd213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA,             MultiplyAddNegated,                          0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ps,       INS_vfnmadd213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplyAdd,                                -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ps,        INS_vfmadd213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(FMA,             MultiplyAddNegated,                         -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ps,       INS_vfnmadd213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(FMA,             MultiplyAddNegatedScalar,                   16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmadd213ss,       INS_vfnmadd213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(FMA,             MultiplyAddScalar,                          16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmadd213ss,        INS_vfmadd213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(FMA,             MultiplyAddSubtract,                         0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmaddsub213ps,     INS_vfmaddsub213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA,             MultiplySubtract,                            0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ps,        INS_vfmsub213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA,             MultiplySubtractAdd,                         0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsubadd213ps,     INS_vfmsubadd213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(FMA,             MultiplySubtractNegated,                     0,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ps,       INS_vfnmsub213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(FMA,             MultiplyAddSubtract,                        -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmaddsub213ps,     INS_vfmaddsub213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtract,                           -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ps,        INS_vfmsub213pd},       HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtractAdd,                        -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsubadd213ps,     INS_vfmsubadd213pd},    HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(FMA,             MultiplySubtractNegated,                    -1,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ps,       INS_vfnmsub213pd},      HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(FMA,             MultiplySubtractScalar,                     16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfmsub213ss,        INS_vfmsub213sd},       HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(FMA,             MultiplySubtractNegatedScalar,              16,              3,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_vfnmsub213ss,       INS_vfnmsub213sd},      HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits)