Implement 64-bit-only intrinsic
authorFei Peng <fei.peng@intel.com>
Thu, 29 Nov 2018 00:25:14 +0000 (16:25 -0800)
committerFei Peng <fei.peng@intel.com>
Sat, 1 Dec 2018 02:46:12 +0000 (18:46 -0800)
12 files changed:
src/jit/compiler.cpp
src/jit/compiler.h
src/jit/gentree.cpp
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/hwintrinsicxarch.h
src/jit/importer.cpp
src/jit/instr.h
src/jit/lowerxarch.cpp
src/jit/lsraxarch.cpp
src/vm/methodtablebuilder.cpp

index fe42bb0..a2be870 100644 (file)
@@ -2447,16 +2447,25 @@ void Compiler::compSetProcessor()
         if (configEnableISA(InstructionSet_SSE))
         {
             opts.setSupportedISA(InstructionSet_SSE);
+#ifdef _TARGET_AMD64_
+            opts.setSupportedISA(InstructionSet_SSE_X64);
+#endif
         }
         if (configEnableISA(InstructionSet_SSE2))
         {
             opts.setSupportedISA(InstructionSet_SSE2);
+#ifdef _TARGET_AMD64_
+            opts.setSupportedISA(InstructionSet_SSE2_X64);
+#endif
         }
         if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_LZCNT))
         {
             if (configEnableISA(InstructionSet_LZCNT))
             {
                 opts.setSupportedISA(InstructionSet_LZCNT);
+#ifdef _TARGET_AMD64_
+                opts.setSupportedISA(InstructionSet_LZCNT_X64);
+#endif
             }
         }
         if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_POPCNT))
@@ -2464,6 +2473,9 @@ void Compiler::compSetProcessor()
             if (configEnableISA(InstructionSet_POPCNT))
             {
                 opts.setSupportedISA(InstructionSet_POPCNT);
+#ifdef _TARGET_AMD64_
+                opts.setSupportedISA(InstructionSet_POPCNT_X64);
+#endif
             }
         }
 
@@ -2484,6 +2496,9 @@ void Compiler::compSetProcessor()
                 if (configEnableISA(InstructionSet_SSE41))
                 {
                     opts.setSupportedISA(InstructionSet_SSE41);
+#ifdef _TARGET_AMD64_
+                    opts.setSupportedISA(InstructionSet_SSE41_X64);
+#endif
                 }
             }
             if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE42))
@@ -2491,6 +2506,9 @@ void Compiler::compSetProcessor()
                 if (configEnableISA(InstructionSet_SSE42))
                 {
                     opts.setSupportedISA(InstructionSet_SSE42);
+#ifdef _TARGET_AMD64_
+                    opts.setSupportedISA(InstructionSet_SSE42_X64);
+#endif
                 }
             }
             if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSSE3))
index 986f0ab..a130962 100644 (file)
@@ -3473,7 +3473,6 @@ protected:
     GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass);
     GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType);
     GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand);
-    bool hwIntrinsicSignatureTypeSupported(var_types retType, CORINFO_SIG_INFO* sig, NamedIntrinsic intrinsic);
 #endif // _TARGET_XARCH_
 #ifdef _TARGET_ARM64_
     InstructionSet lookupHWIntrinsicISA(const char* className);
index cf1de87..78ebf6c 100644 (file)
@@ -17379,6 +17379,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
         // TODO-XArch-Cleanup: Move this switch block to be table driven.
 
         case NI_SSE42_Crc32:
+        case NI_SSE42_X64_Crc32:
         case NI_FMA_MultiplyAdd:
         case NI_FMA_MultiplyAddNegated:
         case NI_FMA_MultiplyAddNegatedScalar:
index 919b4e8..320c9fb 100644 (file)
@@ -306,15 +306,19 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             genBaseIntrinsic(node);
             break;
         case InstructionSet_SSE:
+        case InstructionSet_SSE_X64:
             genSSEIntrinsic(node);
             break;
         case InstructionSet_SSE2:
+        case InstructionSet_SSE2_X64:
             genSSE2Intrinsic(node);
             break;
         case InstructionSet_SSE41:
+        case InstructionSet_SSE41_X64:
             genSSE41Intrinsic(node);
             break;
         case InstructionSet_SSE42:
+        case InstructionSet_SSE42_X64:
             genSSE42Intrinsic(node);
             break;
         case InstructionSet_AVX:
@@ -325,21 +329,25 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
             genAESIntrinsic(node);
             break;
         case InstructionSet_BMI1:
+        case InstructionSet_BMI1_X64:
             genBMI1Intrinsic(node);
             break;
         case InstructionSet_BMI2:
+        case InstructionSet_BMI2_X64:
             genBMI2Intrinsic(node);
             break;
         case InstructionSet_FMA:
             genFMAIntrinsic(node);
             break;
         case InstructionSet_LZCNT:
+        case InstructionSet_LZCNT_X64:
             genLZCNTIntrinsic(node);
             break;
         case InstructionSet_PCLMULQDQ:
             genPCLMULQDQIntrinsic(node);
             break;
         case InstructionSet_POPCNT:
+        case InstructionSet_POPCNT_X64:
             genPOPCNTIntrinsic(node);
             break;
         default:
@@ -1398,6 +1406,16 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
             break;
         }
 
+        case NI_SSE_X64_ConvertScalarToVector128Single:
+        {
+            assert(baseType == TYP_LONG);
+            assert(op1 != nullptr);
+            assert(op2 != nullptr);
+            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+            genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE);
+            break;
+        }
+
         case NI_SSE_MoveMask:
         {
             assert(baseType == TYP_FLOAT);
@@ -1566,19 +1584,18 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             break;
         }
 
-        case NI_SSE2_ConvertScalarToVector128Double:
-        case NI_SSE2_ConvertScalarToVector128Single:
+        case NI_SSE2_X64_ConvertScalarToVector128Double:
         {
-            assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
+            assert(baseType == TYP_LONG);
             assert(op1 != nullptr);
             assert(op2 != nullptr);
             instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-            genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize));
+            genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE);
             break;
         }
 
-        case NI_SSE2_ConvertScalarToVector128Int64:
-        case NI_SSE2_ConvertScalarToVector128UInt64:
+        case NI_SSE2_X64_ConvertScalarToVector128Int64:
+        case NI_SSE2_X64_ConvertScalarToVector128UInt64:
         {
             assert(baseType == TYP_LONG || baseType == TYP_ULONG);
             assert(op1 != nullptr);
@@ -1590,9 +1607,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
 
         case NI_SSE2_ConvertToInt32:
         case NI_SSE2_ConvertToInt32WithTruncation:
-        case NI_SSE2_ConvertToInt64:
         case NI_SSE2_ConvertToUInt32:
-        case NI_SSE2_ConvertToUInt64:
+        case NI_SSE2_X64_ConvertToUInt64:
+        case NI_SSE2_X64_ConvertToInt64:
         {
             assert(op2 == nullptr);
             instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
@@ -1639,6 +1656,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
         }
 
         case NI_SSE2_StoreNonTemporal:
+        case NI_SSE2_X64_StoreNonTemporal:
         {
             assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
             assert(op1 != nullptr);
@@ -1730,6 +1748,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
         }
 
         case NI_SSE41_Extract:
+        case NI_SSE41_X64_Extract:
         {
             regNumber   tmpTargetReg = REG_NA;
             instruction ins          = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
@@ -1804,6 +1823,7 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
     switch (intrinsicId)
     {
         case NI_SSE42_Crc32:
+        case NI_SSE42_X64_Crc32:
         {
             if (op1Reg != targetReg)
             {
@@ -2141,6 +2161,7 @@ void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
     switch (intrinsicId)
     {
         case NI_BMI1_AndNot:
+        case NI_BMI1_X64_AndNot:
         {
             assert(op2 != nullptr);
             assert(op1->TypeGet() == op2->TypeGet());
@@ -2152,6 +2173,9 @@ void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
         case NI_BMI1_ExtractLowestSetBit:
         case NI_BMI1_GetMaskUpToLowestSetBit:
         case NI_BMI1_ResetLowestSetBit:
+        case NI_BMI1_X64_ExtractLowestSetBit:
+        case NI_BMI1_X64_GetMaskUpToLowestSetBit:
+        case NI_BMI1_X64_ResetLowestSetBit:
         {
             assert(op2 == nullptr);
             assert((targetType == TYP_INT) || (targetType == TYP_LONG));
@@ -2160,6 +2184,7 @@ void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
         }
 
         case NI_BMI1_TrailingZeroCount:
+        case NI_BMI1_X64_TrailingZeroCount:
         {
             assert(op2 == nullptr);
             assert((targetType == TYP_INT) || (targetType == TYP_LONG));
@@ -2206,6 +2231,8 @@ void CodeGen::genBMI2Intrinsic(GenTreeHWIntrinsic* node)
     {
         case NI_BMI2_ParallelBitDeposit:
         case NI_BMI2_ParallelBitExtract:
+        case NI_BMI2_X64_ParallelBitDeposit:
+        case NI_BMI2_X64_ParallelBitExtract:
         {
             assert(op2 != nullptr);
             assert(op1->TypeGet() == op2->TypeGet());
@@ -2330,7 +2357,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
 //
 void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node)
 {
-    assert(node->gtHWIntrinsicId == NI_LZCNT_LeadingZeroCount);
+    assert(node->gtHWIntrinsicId == NI_LZCNT_LeadingZeroCount ||
+           node->gtHWIntrinsicId == NI_LZCNT_X64_LeadingZeroCount);
 
     genConsumeOperands(node);
     genXCNTIntrinsic(node, INS_lzcnt);
@@ -2356,7 +2384,7 @@ void CodeGen::genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node)
 //
 void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node)
 {
-    assert(node->gtHWIntrinsicId == NI_POPCNT_PopCount);
+    assert(node->gtHWIntrinsicId == NI_POPCNT_PopCount || node->gtHWIntrinsicId == NI_POPCNT_X64_PopCount);
 
     genConsumeOperands(node);
     genXCNTIntrinsic(node, INS_popcnt);
index 08fc125..75de24b 100644 (file)
@@ -99,10 +99,8 @@ HARDWARE_INTRINSIC(SSE_CompareOrderedScalar,                        "CompareOrde
 HARDWARE_INTRINSIC(SSE_CompareUnordered,                            "CompareUnordered",                             SSE,           3,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar,                      "CompareUnorderedScalar",                       SSE,           3,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_ConvertToInt32,                              "ConvertToInt32",                               SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtss2si,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64,                              "ConvertToInt64",                               SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtss2si,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single,              "ConvertScalarToVector128Single",               SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2ss,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SecondArgMaybe64Bit|HW_Flag_BaseTypeFromFirstArg|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_ConvertScalarToVector128Single,              "ConvertScalarToVector128Single",               SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2ss,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation,                "ConvertToInt32WithTruncation",                 SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttss2si,      INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation,                "ConvertToInt64WithTruncation",                 SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttss2si,      INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE_Divide,                                      "Divide",                                       SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divps,          INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_DivideScalar,                                "DivideScalar",                                 SSE,          -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divss,          INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_LoadAlignedVector128,                        "LoadAlignedVector128",                         SSE,          -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movaps,         INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
@@ -149,6 +147,16 @@ HARDWARE_INTRINSIC(SSE_Xor,                                         "Xor",
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
 //                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  SSE 64-bit-only Intrinsics
+HARDWARE_INTRINSIC(SSE_X64_IsSupported,                             "get_IsSupported",                              SSE_X64,      -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_X64_ConvertToInt64,                          "ConvertToInt64",                               SSE_X64,      -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtss2si,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_X64_ConvertToInt64WithTruncation,            "ConvertToInt64WithTruncation",                 SSE_X64,      -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttss2si,      INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_X64_ConvertScalarToVector128Single,          "ConvertScalarToVector128Single",               SSE_X64,      -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2ss,       INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE2 Intrinsics
 HARDWARE_INTRINSIC(SSE2_IsSupported,                                "get_IsSupported",                              SSE2,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_Add,                                        "Add",                                          SSE2,         -1,              16,           2,     {INS_paddb,             INS_paddb,          INS_paddw,          INS_paddw,          INS_paddd,          INS_paddd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
@@ -195,20 +203,15 @@ HARDWARE_INTRINSIC(SSE2_CompareUnordered,                           "CompareUnor
 HARDWARE_INTRINSIC(SSE2_CompareUnorderedScalar,                     "CompareUnorderedScalar",                       SSE2,          3,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_ConvertToInt32,                             "ConvertToInt32",                               SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_ConvertToInt32WithTruncation,               "ConvertToInt32WithTruncation",                 SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64,                             "ConvertToInt64",                               SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToInt64WithTruncation,               "ConvertToInt64WithTruncation",                 SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_ConvertToUInt32,                            "ConvertToUInt32",                              SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertToUInt64,                            "ConvertToUInt64",                              SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_ConvertToVector128Double,                   "ConvertToVector128Double",                     SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtdq2pd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtps2pd,       INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double,             "ConvertScalarToVector128Double",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2sd,       INS_invalid,        INS_cvtsi2sd,       INS_invalid,        INS_cvtss2sd,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SecondArgMaybe64Bit|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Double,             "ConvertScalarToVector128Double",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2sd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtss2sd,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32,                    "ConvertToVector128Int32",                      SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtps2dq,       INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int32,              "ConvertScalarToVector128Int32",                SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation,      "ConvertToVector128Int32WithTruncation",        SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttps2dq,      INS_cvttpd2dq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Int64,              "ConvertScalarToVector128Int64",                SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single,                   "ConvertToVector128Single",                     SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtdq2ps,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single,             "ConvertScalarToVector128Single",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsd2ss,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128Single,             "ConvertScalarToVector128Single",               SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsd2ss,       INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt32,             "ConvertScalarToVector128UInt32",               SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_ConvertScalarToVector128UInt64,             "ConvertScalarToVector128UInt64",               SSE2,         -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_64BitOnly|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(SSE2_Divide,                                     "Divide",                                       SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_DivideScalar,                               "DivideScalar",                                 SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_divsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2_Extract,                                    "Extract",                                      SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_pextrw,         INS_pextrw,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
@@ -251,7 +254,7 @@ HARDWARE_INTRINSIC(SSE2_StoreAligned,                               "StoreAligne
 HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal,                    "StoreAlignedNonTemporal",                      SSE2,         -1,              16,           2,     {INS_movntdq,           INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_movntdq,        INS_invalid,        INS_movntpd},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_StoreHigh,                                  "StoreHigh",                                    SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movhpd},            HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_StoreLow,                                   "StoreLow",                                     SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movq,           INS_movq,           INS_invalid,        INS_movlpd},            HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_StoreNonTemporal,                           "StoreNonTemporal",                             SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_movnti,         INS_movnti,         INS_movnti,         INS_movnti,         INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SecondArgMaybe64Bit|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_StoreNonTemporal,                           "StoreNonTemporal",                             SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_movnti,         INS_movnti,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(SSE2_StoreScalar,                                "StoreScalar",                                  SSE2,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movsdsse2},         HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_Subtract,                                   "Subtract",                                     SSE2,         -1,              16,           2,     {INS_psubb,             INS_psubb,          INS_psubw,          INS_psubw,          INS_psubd,          INS_psubd,          INS_psubq,          INS_psubq,          INS_invalid,        INS_subpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_SubtractSaturate,                           "SubtractSaturate",                             SSE2,         -1,              16,           2,     {INS_psubsb,            INS_psubusb,        INS_psubsw,         INS_psubusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -264,6 +267,20 @@ HARDWARE_INTRINSIC(SSE2_Xor,                                        "Xor",
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
 //                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  SSE2 64-bit-only Intrinsics
+HARDWARE_INTRINSIC(SSE2_X64_IsSupported,                            "get_IsSupported",                              SSE2_X64,     -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_X64_ConvertToInt64,                         "ConvertToInt64",                               SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64_ConvertToInt64WithTruncation,           "ConvertToInt64WithTruncation",                 SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64_ConvertToUInt64,                        "ConvertToUInt64",                              SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64_ConvertScalarToVector128Double,         "ConvertScalarToVector128Double",               SSE2_X64,     -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cvtsi2sd,       INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(SSE2_X64_ConvertScalarToVector128Int64,          "ConvertScalarToVector128Int64",                SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64_ConvertScalarToVector128UInt64,         "ConvertScalarToVector128UInt64",               SSE2_X64,     -1,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_i2xmm,      INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64_StoreNonTemporal,                       "StoreNonTemporal",                             SSE2_X64,     -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movnti,         INS_movnti,         INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE3 Intrinsics
 HARDWARE_INTRINSIC(SSE3_IsSupported,                                "get_IsSupported",                              SSE3,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE3_AddSubtract,                                "AddSubtract",                                  SSE3,         -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addsubps,       INS_addsubpd},          HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -307,10 +324,10 @@ HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16,                   "ConvertToVe
 HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32,                   "ConvertToVector128Int32",                      SSE41,        -1,              16,           1,     {INS_pmovsxbd,          INS_pmovzxbd,       INS_pmovsxwd,       INS_pmovzxwd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64,                   "ConvertToVector128Int64",                      SSE41,        -1,              16,           1,     {INS_pmovsxbq,          INS_pmovzxbq,       INS_pmovsxwq,       INS_pmovzxwq,       INS_pmovsxdq,       INS_pmovzxdq,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_DotProduct,                                "DotProduct",                                   SSE41,        -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_dpps,           INS_dppd},              HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(SSE41_Extract,                                   "Extract",                                      SSE41,        -1,              16,           2,     {INS_pextrb,            INS_pextrb,         INS_invalid,        INS_invalid,        INS_pextrd,         INS_pextrd,         INS_pextrq,         INS_pextrq,         INS_extractps,      INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_Extract,                                   "Extract",                                      SSE41,        -1,              16,           2,     {INS_pextrb,            INS_pextrb,         INS_invalid,        INS_invalid,        INS_pextrd,         INS_pextrd,         INS_invalid,        INS_invalid,        INS_extractps,      INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_Floor,                                     "Floor",                                        SSE41,         9,              16,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundps,        INS_roundpd},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_FloorScalar,                               "FloorScalar",                                  SSE41,         9,              16,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_roundss,        INS_roundsd},           HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE41_Insert,                                    "Insert",                                       SSE41,        -1,              16,           3,     {INS_pinsrb,            INS_pinsrb,         INS_invalid,        INS_invalid,        INS_pinsrd,         INS_pinsrd,         INS_pinsrq,         INS_pinsrq,         INS_insertps,       INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_SecondArgMaybe64Bit)
+HARDWARE_INTRINSIC(SSE41_Insert,                                    "Insert",                                       SSE41,        -1,              16,           3,     {INS_pinsrb,            INS_pinsrb,         INS_invalid,        INS_invalid,        INS_pinsrd,         INS_pinsrd,         INS_invalid,        INS_invalid,        INS_insertps,       INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(SSE41_LoadAlignedVector128NonTemporal,           "LoadAlignedVector128NonTemporal",              SSE41,        -1,              16,           1,     {INS_movntdqa,          INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE41_Max,                                       "Max",                                          SSE41,        -1,              16,           2,     {INS_pmaxsb,            INS_invalid,        INS_invalid,        INS_pmaxuw,         INS_pmaxsd,         INS_pmaxud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE41_Min,                                       "Min",                                          SSE41,        -1,              16,           2,     {INS_pminsb,            INS_invalid,        INS_invalid,        INS_pminuw,         INS_pminsd,         INS_pminud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
@@ -340,15 +357,32 @@ HARDWARE_INTRINSIC(SSE41_TestZ,                                     "TestZ",
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
 //                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  SSE41 64-bit-only Intrinsics
+HARDWARE_INTRINSIC(SSE41_X64_IsSupported,                           "get_IsSupported",                              SSE41_X64,    -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE41_X64_Extract,                               "Extract",                                      SSE41_X64,    -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pextrq,         INS_pextrq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_X64_Insert,                                "Insert",                                       SSE41_X64,    -1,              16,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pinsrq,         INS_pinsrq,         INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE42 Intrinsics
 HARDWARE_INTRINSIC(SSE42_IsSupported,                               "get_IsSupported",                              SSE42,        -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE42_Crc32,                                     "Crc32",                                        SSE42,        -1,               0,           2,     {INS_invalid,           INS_crc32,          INS_invalid,        INS_crc32,          INS_invalid,        INS_crc32,          INS_invalid,        INS_crc32,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
+HARDWARE_INTRINSIC(SSE42_Crc32,                                     "Crc32",                                        SSE42,        -1,               0,           2,     {INS_invalid,           INS_crc32,          INS_invalid,        INS_crc32,          INS_invalid,        INS_crc32,          INS_invalid,        INS_invalid,         INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
 HARDWARE_INTRINSIC(SSE42_CompareGreaterThan,                        "CompareGreaterThan",                           SSE42,        -1,              16,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pcmpgtq,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
 //                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  SSE42 Intrinsics
+HARDWARE_INTRINSIC(SSE42_X64_IsSupported,                           "get_IsSupported",                              SSE42_X64,    -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE42_X64_Crc32,                                 "Crc32",                                        SSE42_X64,    -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_crc32,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  AVX Intrinsics
 HARDWARE_INTRINSIC(AVX_IsSupported,                                 "get_IsSupported",                              AVX,          -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX_Add,                                         "Add",                                          AVX,          -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_addps,          INS_addpd},             HW_Category_SimpleSIMD,             HW_Flag_Commutative)
@@ -506,11 +540,23 @@ HARDWARE_INTRINSIC(AES_KeygenAssist,                                "KeygenAssis
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI1 Intrinsics
 HARDWARE_INTRINSIC(BMI1_IsSupported,                                "get_IsSupported",                              BMI1,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(BMI1_AndNot,                                     "AndNot",                                       BMI1,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_andn,           INS_andn,           INS_andn,           INS_andn,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_ExtractLowestSetBit,                        "ExtractLowestSetBit",                          BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsi,           INS_blsi,           INS_blsi,           INS_blsi,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_GetMaskUpToLowestSetBit,                    "GetMaskUpToLowestSetBit",                      BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsmsk,         INS_blsmsk,         INS_blsmsk,         INS_blsmsk,         INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_ResetLowestSetBit,                          "ResetLowestSetBit",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsr,           INS_blsr,           INS_blsr,           INS_blsr,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZeroCount",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(BMI1_AndNot,                                     "AndNot",                                       BMI1,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_andn,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_ExtractLowestSetBit,                        "ExtractLowestSetBit",                          BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsi,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_GetMaskUpToLowestSetBit,                    "GetMaskUpToLowestSetBit",                      BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsmsk,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_ResetLowestSetBit,                          "ResetLowestSetBit",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsr,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZeroCount",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  BMI1 Intrinsics
+HARDWARE_INTRINSIC(BMI1_X64_IsSupported,                            "get_IsSupported",                              BMI1_X64,     -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(BMI1_X64_AndNot,                                 "AndNot",                                       BMI1_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_andn,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64_ExtractLowestSetBit,                    "ExtractLowestSetBit",                          BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blsi,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64_GetMaskUpToLowestSetBit,                "GetMaskUpToLowestSetBit",                      BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blsmsk,         INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64_ResetLowestSetBit,                      "ResetLowestSetBit",                            BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_blsr,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_X64_TrailingZeroCount,                      "TrailingZeroCount",                            BMI1_X64,     -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
@@ -518,8 +564,17 @@ HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZer
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI2 Intrinsics
 HARDWARE_INTRINSIC(BMI2_IsSupported,                                "get_IsSupported",                              BMI2,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(BMI2_ParallelBitDeposit,                         "ParallelBitDeposit",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pdep,           INS_pdep,           INS_pdep,           INS_pdep,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI2_ParallelBitExtract,                         "ParallelBitExtract",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pext,           INS_pext,           INS_pext,           INS_pext,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_ParallelBitDeposit,                         "ParallelBitDeposit",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pdep,           INS_invalid,        INS_invalid,        INS_invalid,           INS_invalid,     INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_ParallelBitExtract,                         "ParallelBitExtract",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pext,           INS_invalid,        INS_invalid,        INS_invalid,           INS_invalid,     INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  BMI2 Intrinsics
+HARDWARE_INTRINSIC(BMI2_X64_IsSupported,                            "get_IsSupported",                              BMI2_X64,     -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(BMI2_X64_ParallelBitDeposit,                     "ParallelBitDeposit",                           BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pdep,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_X64_ParallelBitExtract,                     "ParallelBitExtract",                           BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pext,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
@@ -544,7 +599,15 @@ HARDWARE_INTRINSIC(FMA_MultiplySubtractNegatedScalar,               "MultiplySub
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  LZCNT Intrinsics
 HARDWARE_INTRINSIC(LZCNT_IsSupported,                               "get_IsSupported",                              LZCNT,        -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount,                          "LeadingZeroCount",                             LZCNT,        -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_lzcnt,          INS_invalid,        INS_lzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount,                          "LeadingZeroCount",                             LZCNT,        -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_lzcnt,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  LZCNT Intrinsics
+HARDWARE_INTRINSIC(LZCNT_X64_IsSupported,                           "get_IsSupported",                              LZCNT_X64,    -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(LZCNT_X64_LeadingZeroCount,                      "LeadingZeroCount",                             LZCNT_X64,    -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_lzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
@@ -560,7 +623,16 @@ HARDWARE_INTRINSIC(PCLMULQDQ_CarrylessMultiply,                     "CarrylessMu
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  POPCNT Intrinsics
 HARDWARE_INTRINSIC(POPCNT_IsSupported,                              "get_IsSupported",                              POPCNT,       -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(POPCNT_PopCount,                                 "PopCount",                                     POPCNT,       -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_popcnt,         INS_invalid,        INS_popcnt,         INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(POPCNT_PopCount,                                 "PopCount",                                     POPCNT,       -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_popcnt,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
+//                                                                                                                                                                      {TYP_BYTE,              TYP_UBYTE,          TYP_SHORT,          TYP_USHORT,         TYP_INT,            TYP_UINT,           TYP_LONG,           TYP_ULONG,          TYP_FLOAT,          TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  POPCNT Intrinsics
+HARDWARE_INTRINSIC(POPCNT_X64_IsSupported,                          "get_IsSupported",                              POPCNT_X64,   -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(POPCNT_X64_PopCount,                             "PopCount",                                     POPCNT_X64,   -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_popcnt,         INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
+
 #endif // FEATURE_HW_INTRINSIC
 
 #undef HARDWARE_INTRINSIC
index 7a7c689..8cd4508 100644 (file)
@@ -39,14 +39,15 @@ const HWIntrinsicInfo& HWIntrinsicInfo::lookup(NamedIntrinsic id)
 // Arguments:
 //    className  -- The name of the class associated with the HWIntrinsic to lookup
 //    methodName -- The name of the method associated with the HWIntrinsic to lookup
+//    enclosingClassName -- The name of the enclosing class of X64 classes
 //
 // Return Value:
 //    The NamedIntrinsic associated with methodName and isa
-NamedIntrinsic HWIntrinsicInfo::lookupId(const char* className, const char* methodName)
+NamedIntrinsic HWIntrinsicInfo::lookupId(const char* className, const char* methodName, const char* enclosingClassName)
 {
     // TODO-Throughput: replace sequential search by binary search
 
-    InstructionSet isa = lookupIsa(className);
+    InstructionSet isa = lookupIsa(className, enclosingClassName);
     assert(isa != InstructionSet_ILLEGAL);
 
     assert(methodName != nullptr);
@@ -70,17 +71,50 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(const char* className, const char* meth
 }
 
 //------------------------------------------------------------------------
-// lookupIsa: Gets the InstructionSet for a given class name
+// X64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet
+//
+// Arguments:
+//    isa -- The InstructionSet ID
+//
+// Return Value:
+//    The 64-bit only InstructionSet associated with isa
+static InstructionSet X64VersionOfIsa(InstructionSet isa)
+{
+    switch (isa)
+    {
+        case InstructionSet_SSE:
+            return InstructionSet_SSE_X64;
+        case InstructionSet_SSE2:
+            return InstructionSet_SSE2_X64;
+        case InstructionSet_SSE41:
+            return InstructionSet_SSE41_X64;
+        case InstructionSet_SSE42:
+            return InstructionSet_SSE42_X64;
+        case InstructionSet_BMI1:
+            return InstructionSet_BMI1_X64;
+        case InstructionSet_BMI2:
+            return InstructionSet_BMI2_X64;
+        case InstructionSet_LZCNT:
+            return InstructionSet_LZCNT_X64;
+        case InstructionSet_POPCNT:
+            return InstructionSet_POPCNT_X64;
+        default:
+            unreached();
+            return InstructionSet_ILLEGAL;
+    }
+}
+
+//------------------------------------------------------------------------
+// lookupInstructionSet: Gets the InstructionSet for a given class name
 //
 // Arguments:
 //    className -- The name of the class associated with the InstructionSet to lookup
 //
 // Return Value:
 //    The InstructionSet associated with className
-InstructionSet HWIntrinsicInfo::lookupIsa(const char* className)
+static InstructionSet lookupInstructionSet(const char* className)
 {
     assert(className != nullptr);
-
     if (className[0] == 'A')
     {
         if (strcmp(className, "Aes") == 0)
@@ -159,6 +193,30 @@ InstructionSet HWIntrinsicInfo::lookupIsa(const char* className)
 }
 
 //------------------------------------------------------------------------
+// lookupIsa: Gets the InstructionSet for a given class name and enclsoing class name
+//
+// Arguments:
+//    className -- The name of the class associated with the InstructionSet to lookup
+//    enclosingClassName -- The name of the enclosing class of X64 classes
+//
+// Return Value:
+//    The InstructionSet associated with className and enclosingClassName
+InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, const char* enclosingClassName)
+{
+    assert(className != nullptr);
+
+    if (strcmp(className, "X64") == 0)
+    {
+        assert(enclosingClassName != nullptr);
+        return X64VersionOfIsa(lookupInstructionSet(enclosingClassName));
+    }
+    else
+    {
+        return lookupInstructionSet(className);
+    }
+}
+
+//------------------------------------------------------------------------
 // lookupSimdSize: Gets the SimdSize for a given HWIntrinsic and signature
 //
 // Arguments:
@@ -442,27 +500,30 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa)
 {
     switch (isa)
     {
-        // These ISAs are partially implemented
-        case InstructionSet_BMI1:
-        case InstructionSet_BMI2:
-        case InstructionSet_SSE42:
-        {
-            return true;
-        }
-
         // These ISAs are fully implemented
         case InstructionSet_AES:
         case InstructionSet_AVX:
         case InstructionSet_AVX2:
+        case InstructionSet_BMI1:
+        case InstructionSet_BMI2:
+        case InstructionSet_BMI1_X64:
+        case InstructionSet_BMI2_X64:
         case InstructionSet_FMA:
         case InstructionSet_LZCNT:
+        case InstructionSet_LZCNT_X64:
         case InstructionSet_PCLMULQDQ:
         case InstructionSet_POPCNT:
+        case InstructionSet_POPCNT_X64:
         case InstructionSet_SSE:
+        case InstructionSet_SSE_X64:
         case InstructionSet_SSE2:
+        case InstructionSet_SSE2_X64:
         case InstructionSet_SSE3:
         case InstructionSet_SSSE3:
         case InstructionSet_SSE41:
+        case InstructionSet_SSE41_X64:
+        case InstructionSet_SSE42:
+        case InstructionSet_SSE42_X64:
         {
             return true;
         }
@@ -488,8 +549,12 @@ bool HWIntrinsicInfo::isScalarIsa(InstructionSet isa)
     {
         case InstructionSet_BMI1:
         case InstructionSet_BMI2:
+        case InstructionSet_BMI1_X64:
+        case InstructionSet_BMI2_X64:
         case InstructionSet_LZCNT:
+        case InstructionSet_LZCNT_X64:
         case InstructionSet_POPCNT:
+        case InstructionSet_POPCNT_X64:
         {
             return true;
         }
@@ -635,41 +700,6 @@ bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
 }
 
 //------------------------------------------------------------------------
-// hwIntrinsicSignatureTypeSupported: platform support of hardware intrinsics
-//
-// Arguments:
-//    retType - return type
-//    sig     - intrinsic signature
-//
-// Return Value:
-//    Returns true iff the given type signature is supported
-// Notes:
-//    - This is only used on 32-bit systems to determine whether the signature uses no 64-bit registers.
-//    - The `retType` is passed to avoid another call to the type system, as it has already been retrieved.
-bool Compiler::hwIntrinsicSignatureTypeSupported(var_types retType, CORINFO_SIG_INFO* sig, NamedIntrinsic intrinsic)
-{
-#ifdef _TARGET_X86_
-    CORINFO_CLASS_HANDLE argClass;
-
-    if (HWIntrinsicInfo::Is64BitOnly(intrinsic))
-    {
-        return false;
-    }
-    else if (HWIntrinsicInfo::SecondArgMaybe64Bit(intrinsic))
-    {
-        assert(sig->numArgs >= 2);
-        CorInfoType corType =
-            strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(sig->args), &argClass));
-        return !varTypeIsLong(JITtype2varType(corType));
-    }
-
-    return !varTypeIsLong(retType);
-#else
-    return true;
-#endif
-}
-
-//------------------------------------------------------------------------
 // impIsTableDrivenHWIntrinsic:
 //
 // Arguments:
@@ -718,10 +748,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
     // This intrinsic is supported if
     // - the ISA is available on the underlying hardware (compSupports returns true)
     // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
-    // - intrinsics do not require 64-bit registers (r64) on 32-bit platforms (signatureTypeSupproted returns
-    // true)
-    bool issupported =
-        compSupports(isa) && compSupportsHWIntrinsic(isa) && hwIntrinsicSignatureTypeSupported(retType, sig, intrinsic);
+    bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa);
 
     if (category == HW_Category_IsSupportedProperty)
     {
@@ -904,6 +931,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
         case InstructionSet_SSE2:
             return impSSE2Intrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_SSE42:
+        case InstructionSet_SSE42_X64:
             return impSSE42Intrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_AVX:
         case InstructionSet_AVX2:
@@ -912,16 +940,20 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
         case InstructionSet_AES:
             return impAESIntrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_BMI1:
+        case InstructionSet_BMI1_X64:
             return impBMI1Intrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_BMI2:
+        case InstructionSet_BMI2_X64:
             return impBMI2Intrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_FMA:
             return impFMAIntrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_LZCNT:
+        case InstructionSet_LZCNT_X64:
             return impLZCNTIntrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_PCLMULQDQ:
             return impPCLMULQDQIntrinsic(intrinsic, method, sig, mustExpand);
         case InstructionSet_POPCNT:
+        case InstructionSet_POPCNT_X64:
             return impPOPCNTIntrinsic(intrinsic, method, sig, mustExpand);
         default:
             return nullptr;
@@ -1073,13 +1105,14 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic        intrinsic,
     switch (intrinsic)
     {
         case NI_SSE42_Crc32:
+        case NI_SSE42_X64_Crc32:
             assert(sig->numArgs == 2);
             op2     = impPopStack().val;
             op1     = impPopStack().val;
             argList = info.compCompHnd->getArgNext(argList);                        // the second argument
             corType = strip(info.compCompHnd->getArgType(sig, argList, &argClass)); // type of the second argument
 
-            retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, NI_SSE42_Crc32);
+            retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
 
             // TODO - currently we use the BaseType to bring the type of the second argument
             // to the code generator. May encode the overload info in other way.
@@ -1215,6 +1248,7 @@ GenTree* Compiler::impBMI1Intrinsic(NamedIntrinsic        intrinsic,
     switch (intrinsic)
     {
         case NI_BMI1_AndNot:
+        case NI_BMI1_X64_AndNot:
         {
             assert(sig->numArgs == 2);
 
@@ -1228,6 +1262,10 @@ GenTree* Compiler::impBMI1Intrinsic(NamedIntrinsic        intrinsic,
         case NI_BMI1_GetMaskUpToLowestSetBit:
         case NI_BMI1_ResetLowestSetBit:
         case NI_BMI1_TrailingZeroCount:
+        case NI_BMI1_X64_ExtractLowestSetBit:
+        case NI_BMI1_X64_GetMaskUpToLowestSetBit:
+        case NI_BMI1_X64_ResetLowestSetBit:
+        case NI_BMI1_X64_TrailingZeroCount:
         {
             assert(sig->numArgs == 1);
             GenTree* op1 = impPopStack().val;
@@ -1253,6 +1291,8 @@ GenTree* Compiler::impBMI2Intrinsic(NamedIntrinsic        intrinsic,
     {
         case NI_BMI2_ParallelBitDeposit:
         case NI_BMI2_ParallelBitExtract:
+        case NI_BMI2_X64_ParallelBitDeposit:
+        case NI_BMI2_X64_ParallelBitExtract:
         {
             assert(sig->numArgs == 2);
 
@@ -1285,7 +1325,7 @@ GenTree* Compiler::impLZCNTIntrinsic(NamedIntrinsic        intrinsic,
 {
     assert(sig->numArgs == 1);
     var_types callType = JITtype2varType(sig->retType);
-    return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_LZCNT_LeadingZeroCount);
+    return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, intrinsic);
 }
 
 GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic        intrinsic,
@@ -1303,7 +1343,7 @@ GenTree* Compiler::impPOPCNTIntrinsic(NamedIntrinsic        intrinsic,
 {
     assert(sig->numArgs == 1);
     var_types callType = JITtype2varType(sig->retType);
-    return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_POPCNT_PopCount);
+    return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, intrinsic);
 }
 
 #endif // FEATURE_HW_INTRINSICS
index 19d2d22..a5aefef 100644 (file)
@@ -63,65 +63,60 @@ enum HWIntrinsicFlag : unsigned int
 
     // NoCodeGen
     // - should be transformed in the compiler front-end, cannot reach CodeGen
-    HW_Flag_NoCodeGen = 0x10,
+    HW_Flag_NoCodeGen = 0x8,
 
     // Unfixed SIMD-size
     // - overloaded on multiple vector sizes (SIMD size in the table is unreliable)
-    HW_Flag_UnfixedSIMDSize = 0x20,
+    HW_Flag_UnfixedSIMDSize = 0x10,
 
     // Multi-instruction
     // - that one intrinsic can generate multiple instructions
-    HW_Flag_MultiIns = 0x80,
+    HW_Flag_MultiIns = 0x20,
 
     // NoContainment
     // the intrinsic cannot be handled by comtainment,
     // all the intrinsic that have explicit memory load/store semantics should have this flag
-    HW_Flag_NoContainment = 0x100,
+    HW_Flag_NoContainment = 0x40,
 
     // Copy Upper bits
     // some SIMD scalar intrinsics need the semantics of copying upper bits from the source operand
-    HW_Flag_CopyUpperBits = 0x200,
+    HW_Flag_CopyUpperBits = 0x80,
 
     // Select base type using the first argument type
-    HW_Flag_BaseTypeFromFirstArg = 0x400,
+    HW_Flag_BaseTypeFromFirstArg = 0x100,
 
     // Indicates compFloatingPointUsed does not need to be set.
-    HW_Flag_NoFloatingPointUsed = 0x800,
+    HW_Flag_NoFloatingPointUsed = 0x200,
 
     // Maybe IMM
     // the intrinsic has either imm or Vector overloads
-    HW_Flag_MaybeIMM = 0x1000,
+    HW_Flag_MaybeIMM = 0x400,
 
     // NoJmpTable IMM
     // the imm intrinsic does not need jumptable fallback when it gets non-const argument
-    HW_Flag_NoJmpTableIMM = 0x2000,
-
-    // 64-bit intrinsics
-    // Intrinsics that operate over 64-bit general purpose registers are not supported on 32-bit platform
-    HW_Flag_64BitOnly           = 0x4000,
-    HW_Flag_SecondArgMaybe64Bit = 0x8000,
+    HW_Flag_NoJmpTableIMM = 0x800,
 
     // Select base type using the second argument type
-    HW_Flag_BaseTypeFromSecondArg = 0x10000,
+    HW_Flag_BaseTypeFromSecondArg = 0x1000,
 
     // Special codegen
     // the intrinsics need special rules in CodeGen,
     // but may be table-driven in the front-end
-    HW_Flag_SpecialCodeGen = 0x20000,
+    HW_Flag_SpecialCodeGen = 0x2000,
 
     // No Read/Modify/Write Semantics
     // the intrinsic doesn't have read/modify/write semantics in two/three-operand form.
-    HW_Flag_NoRMWSemantics = 0x40000,
+    HW_Flag_NoRMWSemantics = 0x4000,
 
     // Special import
     // the intrinsics need special rules in importer,
     // but may be table-driven in the back-end
-    HW_Flag_SpecialImport = 0x80000,
+    HW_Flag_SpecialImport = 0x8000,
 
     // Maybe Memory Load/Store
     // - some intrinsics may have pointer overloads but without HW_Category_MemoryLoad/HW_Category_MemoryStore
-    HW_Flag_MaybeMemoryLoad  = 0x100000,
-    HW_Flag_MaybeMemoryStore = 0x200000,
+    HW_Flag_MaybeMemoryLoad  = 0x10000,
+    HW_Flag_MaybeMemoryStore = 0x20000,
 };
 
 struct HWIntrinsicInfo
@@ -138,8 +133,8 @@ struct HWIntrinsicInfo
 
     static const HWIntrinsicInfo& lookup(NamedIntrinsic id);
 
-    static NamedIntrinsic lookupId(const char* className, const char* methodName);
-    static InstructionSet lookupIsa(const char* className);
+    static NamedIntrinsic lookupId(const char* className, const char* methodName, const char* enclosingClassName);
+    static InstructionSet lookupIsa(const char* className, const char* enclosingClassName);
 
     static unsigned lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig);
 
@@ -288,18 +283,6 @@ struct HWIntrinsicInfo
         return (flags & HW_Flag_NoJmpTableIMM) != 0;
     }
 
-    static bool Is64BitOnly(NamedIntrinsic id)
-    {
-        HWIntrinsicFlag flags = lookupFlags(id);
-        return (flags & HW_Flag_64BitOnly) != 0;
-    }
-
-    static bool SecondArgMaybe64Bit(NamedIntrinsic id)
-    {
-        HWIntrinsicFlag flags = lookupFlags(id);
-        return (flags & HW_Flag_SecondArgMaybe64Bit) != 0;
-    }
-
     static bool BaseTypeFromSecondArg(NamedIntrinsic id)
     {
         HWIntrinsicFlag flags = lookupFlags(id);
index e8ccdc2..eabc3bc 100644 (file)
@@ -4314,9 +4314,11 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
 {
     NamedIntrinsic result = NI_Illegal;
 
-    const char* className     = nullptr;
-    const char* namespaceName = nullptr;
-    const char* methodName    = info.compCompHnd->getMethodNameFromMetadata(method, &className, &namespaceName);
+    const char* className          = nullptr;
+    const char* namespaceName      = nullptr;
+    const char* enclosingClassName = nullptr;
+    const char* methodName =
+        info.compCompHnd->getMethodNameFromMetadata(method, &className, &namespaceName, &enclosingClassName);
 
     if ((namespaceName == nullptr) || (className == nullptr) || (methodName == nullptr))
     {
@@ -4544,7 +4546,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
 #if defined(_TARGET_XARCH_)
         else if (strcmp(namespaceName, ".X86") == 0)
         {
-            result = HWIntrinsicInfo::lookupId(className, methodName);
+            result = HWIntrinsicInfo::lookupId(className, methodName, enclosingClassName);
         }
 #elif defined(_TARGET_ARM64_)
         else if (strcmp(namespaceName, ".Arm.Arm64") == 0)
index 058bec1..a7749d8 100644 (file)
@@ -315,6 +315,14 @@ enum InstructionSet
     InstructionSet_LZCNT,
     InstructionSet_PCLMULQDQ,
     InstructionSet_POPCNT,
+    InstructionSet_BMI1_X64,
+    InstructionSet_BMI2_X64,
+    InstructionSet_LZCNT_X64,
+    InstructionSet_POPCNT_X64,
+    InstructionSet_SSE_X64,
+    InstructionSet_SSE2_X64,
+    InstructionSet_SSE41_X64,
+    InstructionSet_SSE42_X64,
 #elif defined(_TARGET_ARM_)
     InstructionSet_NEON,
 #elif defined(_TARGET_ARM64_)
index 4dda60e..72f5f95 100644 (file)
@@ -2512,6 +2512,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
                 }
 
                 case NI_SSE41_Insert:
+                case NI_SSE41_X64_Insert:
                 {
                     if (containingNode->gtSIMDBaseType == TYP_FLOAT)
                     {
@@ -2743,9 +2744,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     }
 
                     case NI_SSE2_ConvertToInt32:
-                    case NI_SSE2_ConvertToInt64:
+                    case NI_SSE2_X64_ConvertToInt64:
                     case NI_SSE2_ConvertToUInt32:
-                    case NI_SSE2_ConvertToUInt64:
+                    case NI_SSE2_X64_ConvertToUInt64:
                     case NI_AVX2_ConvertToInt32:
                     case NI_AVX2_ConvertToUInt32:
                     {
@@ -3083,6 +3084,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                         case NI_SSE41_Blend:
                         case NI_SSE41_DotProduct:
                         case NI_SSE41_Insert:
+                        case NI_SSE41_X64_Insert:
                         case NI_SSE41_MultipleSumAbsoluteDifferences:
                         case NI_AVX_Blend:
                         case NI_AVX_Compare:
index a28ea02..c5cc71e 100644 (file)
@@ -2441,6 +2441,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
 
 #ifdef _TARGET_X86_
             case NI_SSE42_Crc32:
+            case NI_SSE42_X64_Crc32:
             {
                 // TODO-XArch-Cleanup: Currently we use the BaseType to bring the type of the second argument
                 // to the code generator. We may want to encode the overload info in another way.
index 7c52ebc..3e30f38 100644 (file)
@@ -1501,7 +1501,12 @@ MethodTableBuilder::BuildMethodTableThrowing(
         if (hr == S_OK && strcmp(nameSpace, "System.Runtime.Intrinsics.Arm.Arm64") == 0)
 #else
         // All the funtions in System.Runtime.Intrinsics.X86 are hardware intrinsics.
-        if (hr == S_OK && strcmp(nameSpace, "System.Runtime.Intrinsics.X86") == 0)
+        if (bmtInternal->pType->IsNested())
+        {
+            IfFailThrow(GetMDImport()->GetNameOfTypeDef(bmtInternal->pType->GetEnclosingTypeToken(), NULL, &nameSpace));
+        }
+        
+        if (hr == S_OK && (strcmp(nameSpace, "System.Runtime.Intrinsics.X86") == 0))
 #endif
         {
             if (IsCompilationProcess())