Enable Vector128/256<T> and Add intrinsics
authorFei Peng <fei.peng@intel.com>
Wed, 13 Dec 2017 04:46:49 +0000 (20:46 -0800)
committerFei Peng <fei.peng@intel.com>
Wed, 13 Dec 2017 04:46:49 +0000 (20:46 -0800)
20 files changed:
src/inc/corinfo.h
src/jit/codegenlinear.h
src/jit/compiler.cpp
src/jit/compiler.h
src/jit/emitxarch.cpp
src/jit/emitxarch.h
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/importer.cpp
src/jit/lowerxarch.cpp
src/jit/morph.cpp
src/jit/simd.cpp
src/vm/jitinterface.cpp
src/vm/jitinterface.h
tests/src/JIT/HardwareIntrinsics/Add.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/Add_r.csproj [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/Add_ro.csproj [new file with mode: 0644]

index 287f44f..b32e187 100644 (file)
@@ -213,13 +213,14 @@ TODO: Talk about initializing strutures before use
     #define SELECTANY extern __declspec(selectany)
 #endif
 
-SELECTANY const GUID JITEEVersionIdentifier = { /* 01c3d216-a404-4290-8278-ac27a4793d31 */
-    0x01c3d216,
-    0xa404,
-    0x4290,
-    {0x82, 0x78, 0xac, 0x27, 0xa4, 0x79, 0x3d, 0x31}
+SELECTANY const GUID JITEEVersionIdentifier = { /* 19258069-1777-4691-87DF-DADF8F352875 */
+    0x19258069, 
+    0x1777, 
+    0x4691, 
+    { 0x87, 0xdf, 0xda, 0xdf, 0x8f, 0x35, 0x28, 0x75 } 
 };
 
+
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
 //
 // END JITEEVersionIdentifier
@@ -2508,6 +2509,12 @@ public:
             CORINFO_CLASS_HANDLE        cls
             ) = 0;
 
+    // "System.Int32" ==> CORINFO_TYPE_INT..
+    // "System.UInt32" ==> CORINFO_TYPE_UINT..
+    virtual CorInfoType getTypeForPrimitiveNumericClass(
+            CORINFO_CLASS_HANDLE        cls
+            ) = 0;
+
     // TRUE if child is a subtype of parent
     // if parent is an interface, then does child implement / extend parent
     virtual BOOL canCast(
index ec3252f..5804fa8 100644 (file)
@@ -114,7 +114,7 @@ void genPutArgStkSIMD12(GenTree* treeNode);
 #endif // _TARGET_X86_
 #endif // FEATURE_SIMD
 
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
 void genHWIntrinsic(GenTreeHWIntrinsic* node);
 void genSSEIntrinsic(GenTreeHWIntrinsic* node);
 void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
index aaebe35..347062c 100644 (file)
@@ -2116,7 +2116,27 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
     SIMDVector3Handle = nullptr;
     SIMDVector4Handle = nullptr;
     SIMDVectorHandle  = nullptr;
-#endif
+#if FEATURE_HW_INTRINSICS
+    Vector128FloatHandle  = nullptr;
+    Vector128DoubleHandle = nullptr;
+    Vector128IntHandle    = nullptr;
+    Vector128UShortHandle = nullptr;
+    Vector128UByteHandle  = nullptr;
+    Vector128ShortHandle  = nullptr;
+    Vector128ByteHandle   = nullptr;
+    Vector128LongHandle   = nullptr;
+    Vector128UIntHandle   = nullptr;
+    Vector256FloatHandle  = nullptr;
+    Vector256DoubleHandle = nullptr;
+    Vector256IntHandle    = nullptr;
+    Vector256UShortHandle = nullptr;
+    Vector256UByteHandle  = nullptr;
+    Vector256ShortHandle  = nullptr;
+    Vector256ByteHandle   = nullptr;
+    Vector256LongHandle   = nullptr;
+    Vector256UIntHandle   = nullptr;
+#endif // FEATURE_HW_INTRINSICS
+#endif // FEATURE_SIMD
 
     compUsesThrowHelper = false;
 }
index d17211d..5b47689 100644 (file)
@@ -2068,6 +2068,7 @@ public:
                                                    GenTree*       op2,
                                                    NamedIntrinsic hwIntrinsicID);
     GenTree* gtNewMustThrowException(unsigned helper, var_types type);
+    CORINFO_CLASS_HANDLE gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType);
 #endif // FEATURE_HW_INTRINSICS
 
     GenTreePtr gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
@@ -3025,6 +3026,8 @@ protected:
     InstructionSet lookupHWIntrinsicISA(const char* className);
     NamedIntrinsic lookupHWIntrinsic(const char* methodName, InstructionSet isa);
     InstructionSet isaOfHWIntrinsic(NamedIntrinsic intrinsic);
+    bool isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic);
+#ifdef _TARGET_XARCH_
     GenTree* impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
     GenTree* impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
     GenTree* impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
@@ -3041,6 +3044,7 @@ protected:
     GenTree* impLZCNTIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
     GenTree* impPCLMULQDQIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
     GenTree* impPOPCNTIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
+#endif // _TARGET_XARCH_
 #endif // FEATURE_HW_INTRINSICS
     GenTreePtr impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
                                        CORINFO_SIG_INFO*    sig,
@@ -7393,6 +7397,29 @@ private:
     CORINFO_CLASS_HANDLE SIMDVector4Handle;
     CORINFO_CLASS_HANDLE SIMDVectorHandle;
 
+#if FEATURE_HW_INTRINSICS
+    CORINFO_CLASS_HANDLE Vector128FloatHandle;
+    CORINFO_CLASS_HANDLE Vector128DoubleHandle;
+    CORINFO_CLASS_HANDLE Vector128IntHandle;
+    CORINFO_CLASS_HANDLE Vector128UShortHandle;
+    CORINFO_CLASS_HANDLE Vector128UByteHandle;
+    CORINFO_CLASS_HANDLE Vector128ShortHandle;
+    CORINFO_CLASS_HANDLE Vector128ByteHandle;
+    CORINFO_CLASS_HANDLE Vector128LongHandle;
+    CORINFO_CLASS_HANDLE Vector128UIntHandle;
+    CORINFO_CLASS_HANDLE Vector128ULongHandle;
+    CORINFO_CLASS_HANDLE Vector256FloatHandle;
+    CORINFO_CLASS_HANDLE Vector256DoubleHandle;
+    CORINFO_CLASS_HANDLE Vector256IntHandle;
+    CORINFO_CLASS_HANDLE Vector256UShortHandle;
+    CORINFO_CLASS_HANDLE Vector256UByteHandle;
+    CORINFO_CLASS_HANDLE Vector256ShortHandle;
+    CORINFO_CLASS_HANDLE Vector256ByteHandle;
+    CORINFO_CLASS_HANDLE Vector256LongHandle;
+    CORINFO_CLASS_HANDLE Vector256UIntHandle;
+    CORINFO_CLASS_HANDLE Vector256ULongHandle;
+#endif
+
     // Get the handle for a SIMD type.
     CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, var_types simdBaseType)
     {
index b29e653..fe46c19 100644 (file)
@@ -4796,6 +4796,24 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
     emitAdjustStackDepthPushPop(ins);
 }
 
+#if FEATURE_HW_INTRINSICS
+void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype)
+{
+    if (UseVEXEncoding() && reg1 != reg)
+    {
+        emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2);
+    }
+    else
+    {
+        if (reg1 != reg)
+        {
+            emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
+        }
+        emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2);
+    }
+}
+#endif
+
 /*****************************************************************************
  *
  *  The following add instructions referencing stack-based local variables.
index c0ea1c3..746c26b 100644 (file)
@@ -423,6 +423,10 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg,
 
 void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
 
+#if FEATURE_HW_INTRINSICS
+void emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype);
+#endif
+
 #if FEATURE_STACK_FP_X87
 void emitIns_F_F0(instruction ins, unsigned fpreg);
 
index a07fe34..e418723 100644 (file)
@@ -17000,7 +17000,13 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
 #ifdef FEATURE_SIMD
             case GT_SIMD:
                 structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsSIMD()->gtSIMDBaseType);
+                break;
 #endif // FEATURE_SIMD
+#if FEATURE_HW_INTRINSICS
+            case GT_HWIntrinsic:
+                structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
+                break;
+#endif
                 break;
         }
     }
index 86f1ffc..2918bbd 100644 (file)
@@ -1640,6 +1640,15 @@ public:
         return OperIsSIMD(gtOper);
     }
 
+#if FEATURE_HW_INTRINSICS
+    inline bool OperIsSimdHWIntrinsic() const;
+#else
+    inline bool OperIsSimdHWIntrinsic() const
+    {
+        return false;
+    }
+#endif
+
     // This is here for cleaner GT_LONG #ifdefs.
     static bool OperIsLong(genTreeOps gtOper)
     {
@@ -4227,7 +4236,7 @@ struct GenTreeJitIntrinsic : public GenTreeOp
     {
     }
 
-    bool isSIMD()
+    bool isSIMD() const
     {
         return gtSIMDSize != 0;
     }
@@ -4291,6 +4300,17 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
     }
 #endif
 };
+
+inline bool GenTree::OperIsSimdHWIntrinsic() const
+{
+    if (gtOper == GT_HWIntrinsic)
+    {
+        // We cannot use AsHWIntrinsic() as it is not declared const
+        const GenTreeHWIntrinsic* hwIntrinsic = reinterpret_cast<const GenTreeHWIntrinsic*>(this);
+        return hwIntrinsic->isSIMD();
+    }
+    return false;
+}
 #endif // FEATURE_HW_INTRINSICS
 
 /* gtIndex -- array access */
index 763647e..52c0619 100644 (file)
@@ -83,12 +83,90 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
 void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
 {
-    NYI("Implement SSE intrinsic code generation");
+    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+    GenTree*       op1         = node->gtGetOp1();
+    GenTree*       op2         = node->gtGetOp2();
+    regNumber      targetReg   = node->gtRegNum;
+    var_types      targetType  = node->TypeGet();
+    var_types      baseType    = node->gtSIMDBaseType;
+
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = REG_NA;
+    emitter*  emit   = getEmitter();
+
+    genConsumeOperands(node);
+
+    switch (intrinsicID)
+    {
+        case NI_SSE_Add:
+            assert(baseType == TYP_FLOAT);
+            op2Reg = op2->gtRegNum;
+            emit->emitIns_SIMD_R_R_R(INS_addps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
+            break;
+        default:
+            unreached();
+            break;
+    }
+    genProduceReg(node);
 }
 
 void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
 {
-    NYI("Implement SSE2 intrinsic code generation");
+    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+    GenTree*       op1         = node->gtGetOp1();
+    GenTree*       op2         = node->gtGetOp2();
+    regNumber      targetReg   = node->gtRegNum;
+    var_types      targetType  = node->TypeGet();
+    var_types      baseType    = node->gtSIMDBaseType;
+
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = REG_NA;
+    emitter*  emit   = getEmitter();
+
+    genConsumeOperands(node);
+
+    switch (intrinsicID)
+    {
+        case NI_SSE2_Add:
+        {
+            op2Reg = op2->gtRegNum;
+
+            instruction ins;
+            switch (baseType)
+            {
+                case TYP_DOUBLE:
+                    ins = INS_addpd;
+                    break;
+                case TYP_INT:
+                case TYP_UINT:
+                    ins = INS_paddd;
+                    break;
+                case TYP_LONG:
+                case TYP_ULONG:
+                    ins = INS_paddq;
+                    break;
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                    ins = INS_paddb;
+                    break;
+                case TYP_CHAR:
+                case TYP_SHORT:
+                case TYP_USHORT:
+                    ins = INS_paddw;
+                    break;
+                default:
+                    unreached();
+                    break;
+            }
+
+            emit->emitIns_SIMD_R_R_R(ins, targetReg, op1Reg, op2Reg, TYP_SIMD16);
+            break;
+        }
+        default:
+            unreached();
+            break;
+    }
+    genProduceReg(node);
 }
 
 void CodeGen::genSSE3Intrinsic(GenTreeHWIntrinsic* node)
@@ -150,12 +228,103 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
 
 void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
 {
-    NYI("Implement AVX intrinsic code generation");
+    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+    GenTree*       op1         = node->gtGetOp1();
+    GenTree*       op2         = node->gtGetOp2();
+    regNumber      targetReg   = node->gtRegNum;
+    var_types      targetType  = node->TypeGet();
+    var_types      baseType    = node->gtSIMDBaseType;
+
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = REG_NA;
+
+    genConsumeOperands(node);
+
+    emitter* emit = getEmitter();
+    switch (intrinsicID)
+    {
+        case NI_AVX_Add:
+        {
+            op2Reg = op2->gtRegNum;
+
+            instruction ins;
+            switch (baseType)
+            {
+                case TYP_DOUBLE:
+                    ins = INS_addpd;
+                    break;
+                case TYP_FLOAT:
+                    ins = INS_addps;
+                    break;
+                default:
+                    unreached();
+                    break;
+            }
+
+            emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg);
+            break;
+        }
+        default:
+            unreached();
+            break;
+    }
+    genProduceReg(node);
 }
 
 void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
 {
-    NYI("Implement AVX2 intrinsic code generation");
+    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+    GenTree*       op1         = node->gtGetOp1();
+    GenTree*       op2         = node->gtGetOp2();
+    regNumber      targetReg   = node->gtRegNum;
+    var_types      targetType  = node->TypeGet();
+    var_types      baseType    = node->gtSIMDBaseType;
+
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = REG_NA;
+
+    genConsumeOperands(node);
+
+    emitter* emit = getEmitter();
+    switch (intrinsicID)
+    {
+        case NI_AVX2_Add:
+        {
+            op2Reg = op2->gtRegNum;
+
+            instruction ins;
+            switch (baseType)
+            {
+                case TYP_INT:
+                case TYP_UINT:
+                    ins = INS_paddd;
+                    break;
+                case TYP_LONG:
+                case TYP_ULONG:
+                    ins = INS_paddq;
+                    break;
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                    ins = INS_paddb;
+                    break;
+                case TYP_CHAR:
+                case TYP_SHORT:
+                case TYP_USHORT:
+                    ins = INS_paddw;
+                    break;
+                default:
+                    unreached();
+                    break;
+            }
+
+            emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg);
+            break;
+        }
+        default:
+            unreached();
+            break;
+    }
+    genProduceReg(node);
 }
 
 void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node)
index 7db3e5c..e834f7a 100644 (file)
 //                  Intrinsic ID                   Function name                ISA   
 //  SSE Intrinsics          
 HARDWARE_INTRINSIC(SSE_IsSupported,             "get_IsSupported",              SSE)
+HARDWARE_INTRINSIC(SSE_Add,                     "Add",                          SSE)
 
 //  SSE2 Intrinsics 
 HARDWARE_INTRINSIC(SSE2_IsSupported,            "get_IsSupported",              SSE2)
+HARDWARE_INTRINSIC(SSE2_Add,                    "Add",                          SSE2)
 
 //  SSE3 Intrinsics 
 HARDWARE_INTRINSIC(SSE3_IsSupported,            "get_IsSupported",              SSE3)
@@ -33,9 +35,11 @@ HARDWARE_INTRINSIC(SSE42_Crc32,                 "Crc32",
 
 //  AVX Intrinsics 
 HARDWARE_INTRINSIC(AVX_IsSupported,             "get_IsSupported",              AVX)
+HARDWARE_INTRINSIC(AVX_Add,                     "Add",                          AVX)
 
 //  AVX2 Intrinsics 
 HARDWARE_INTRINSIC(AVX2_IsSupported,            "get_IsSupported",              AVX2)
+HARDWARE_INTRINSIC(AVX2_Add,                    "Add",                          AVX2)
 
 //  AES Intrinsics 
 HARDWARE_INTRINSIC(AES_IsSupported,             "get_IsSupported",              AES)
index 15888c9..d76ce77 100644 (file)
@@ -153,6 +153,40 @@ InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
 }
 
 //------------------------------------------------------------------------
+// isIntrinsicAnIsSupportedPropertyGetter: return true if the intrinsic is "get_IsSupported"
+//
+// Arguments:
+//    intrinsic -- id of the intrinsic function.
+//
+// Return Value:
+//    true if the intrinsic is "get_IsSupported"
+//    Sometimes we need to specially treat "get_IsSupported"
+bool Compiler::isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic)
+{
+    switch (intrinsic)
+    {
+        case NI_SSE_IsSupported:
+        case NI_SSE2_IsSupported:
+        case NI_SSE3_IsSupported:
+        case NI_SSSE3_IsSupported:
+        case NI_SSE41_IsSupported:
+        case NI_SSE42_IsSupported:
+        case NI_AVX_IsSupported:
+        case NI_AVX2_IsSupported:
+        case NI_AES_IsSupported:
+        case NI_BMI1_IsSupported:
+        case NI_BMI2_IsSupported:
+        case NI_FMA_IsSupported:
+        case NI_LZCNT_IsSupported:
+        case NI_PCLMULQDQ_IsSupported:
+        case NI_POPCNT_IsSupported:
+            return true;
+        default:
+            return false;
+    }
+}
+
+//------------------------------------------------------------------------
 // impX86HWIntrinsic: dispatch hardware intrinsics to their own implementation
 // function
 //
@@ -167,7 +201,12 @@ InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
 GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
 {
     InstructionSet isa = isaOfHWIntrinsic(intrinsic);
-    if (!compSupports(isa) && strcmp("get_IsSupported", getHWIntrinsicName(intrinsic)) != 0)
+    // Will throw PlatformNotSupportedException if
+    // - calling hardware intrinsics on unsupported hardware
+    // - calling SIMD hardware intrinsics with featureSIMD=false
+    if ((!compSupports(isa) || (!featureSIMD && isa != InstructionSet_BMI1 && isa != InstructionSet_BMI2 &&
+                                isa != InstructionSet_LZCNT && isa != InstructionSet_POPCNT)) &&
+        !isIntrinsicAnIsSupportedPropertyGetter(intrinsic))
     {
         for (unsigned i = 0; i < sig->numArgs; i++)
         {
@@ -213,28 +252,120 @@ GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
     }
 }
 
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType)
+{
+    if (simdType == TYP_SIMD16)
+    {
+        switch (simdBaseType)
+        {
+            case TYP_FLOAT:
+                return Vector128FloatHandle;
+            case TYP_DOUBLE:
+                return Vector128DoubleHandle;
+            case TYP_INT:
+                return Vector128IntHandle;
+            case TYP_CHAR:
+            case TYP_USHORT:
+                return Vector128UShortHandle;
+            case TYP_UBYTE:
+                return Vector128UByteHandle;
+            case TYP_SHORT:
+                return Vector128ShortHandle;
+            case TYP_BYTE:
+                return Vector128ByteHandle;
+            case TYP_LONG:
+                return Vector128LongHandle;
+            case TYP_UINT:
+                return Vector128UIntHandle;
+            case TYP_ULONG:
+                return Vector128ULongHandle;
+            default:
+                assert(!"Didn't find a class handle for simdType");
+        }
+    }
+    else if (simdType == TYP_SIMD32)
+    {
+        switch (simdBaseType)
+        {
+            case TYP_FLOAT:
+                return Vector256FloatHandle;
+            case TYP_DOUBLE:
+                return Vector256DoubleHandle;
+            case TYP_INT:
+                return Vector256IntHandle;
+            case TYP_CHAR:
+            case TYP_USHORT:
+                return Vector256UShortHandle;
+            case TYP_UBYTE:
+                return Vector256UByteHandle;
+            case TYP_SHORT:
+                return Vector256ShortHandle;
+            case TYP_BYTE:
+                return Vector256ByteHandle;
+            case TYP_LONG:
+                return Vector256LongHandle;
+            case TYP_UINT:
+                return Vector256UIntHandle;
+            case TYP_ULONG:
+                return Vector256ULongHandle;
+            default:
+                assert(!"Didn't find a class handle for simdType");
+        }
+    }
+
+    return NO_CLASS_HANDLE;
+}
+
 GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
 {
+    GenTree* retNode = nullptr;
+    GenTree* op1     = nullptr;
+    GenTree* op2     = nullptr;
     switch (intrinsic)
     {
         case NI_SSE_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_SSE));
+            retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE));
+            break;
+
+        case NI_SSE_Add:
+            assert(sig->numArgs == 2);
+            op2     = impSIMDPopStack(TYP_SIMD16);
+            op1     = impSIMDPopStack(TYP_SIMD16);
+            retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE_Add, TYP_FLOAT, 16);
+            break;
 
         default:
-            return nullptr;
+            JITDUMP("Not implemented hardware intrinsic");
+            break;
     }
+    return retNode;
 }
 
 GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
 {
+    GenTree*  retNode  = nullptr;
+    GenTree*  op1      = nullptr;
+    GenTree*  op2      = nullptr;
+    var_types baseType = TYP_UNKNOWN;
     switch (intrinsic)
     {
         case NI_SSE2_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_SSE2));
+            retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE2));
+            break;
+
+        case NI_SSE2_Add:
+            assert(sig->numArgs == 2);
+            op2      = impSIMDPopStack(TYP_SIMD16);
+            op1      = impSIMDPopStack(TYP_SIMD16);
+            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+            retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_Add, baseType, 16);
+            break;
 
         default:
-            return nullptr;
+            JITDUMP("Not implemented hardware intrinsic");
+            break;
     }
+    return retNode;
 }
 
 GenTree* Compiler::impSSE3Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
@@ -242,7 +373,7 @@ GenTree* Compiler::impSSE3Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN
     switch (intrinsic)
     {
         case NI_SSE3_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_SSE3));
+            return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE3));
 
         default:
             return nullptr;
@@ -254,7 +385,7 @@ GenTree* Compiler::impSSSE3Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
     switch (intrinsic)
     {
         case NI_SSSE3_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_SSSE3));
+            return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSSE3));
 
         default:
             return nullptr;
@@ -266,7 +397,7 @@ GenTree* Compiler::impSSE41Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
     switch (intrinsic)
     {
         case NI_SSE41_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_SSE41));
+            return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE41));
 
         default:
             return nullptr;
@@ -286,7 +417,7 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
     switch (intrinsic)
     {
         case NI_SSE42_IsSupported:
-            retNode = gtNewIconNode(compSupports(InstructionSet_SSE42));
+            retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE42));
             break;
 
         case NI_SSE42_Crc32:
@@ -318,26 +449,56 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
 
 GenTree* Compiler::impAVXIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
 {
+    GenTree*  retNode  = nullptr;
+    GenTree*  op1      = nullptr;
+    GenTree*  op2      = nullptr;
+    var_types baseType = TYP_UNKNOWN;
     switch (intrinsic)
     {
         case NI_AVX_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_AVX));
+            retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_AVX));
+            break;
+
+        case NI_AVX_Add:
+            assert(sig->numArgs == 2);
+            op2      = impSIMDPopStack(TYP_SIMD32);
+            op1      = impSIMDPopStack(TYP_SIMD32);
+            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+            retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX_Add, baseType, 32);
+            break;
 
         default:
-            return nullptr;
+            JITDUMP("Not implemented hardware intrinsic");
+            break;
     }
+    return retNode;
 }
 
 GenTree* Compiler::impAVX2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
 {
+    GenTree*  retNode  = nullptr;
+    GenTree*  op1      = nullptr;
+    GenTree*  op2      = nullptr;
+    var_types baseType = TYP_UNKNOWN;
     switch (intrinsic)
     {
         case NI_AVX2_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_AVX2));
+            retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_AVX2));
+            break;
+
+        case NI_AVX2_Add:
+            assert(sig->numArgs == 2);
+            op2      = impSIMDPopStack(TYP_SIMD32);
+            op1      = impSIMDPopStack(TYP_SIMD32);
+            baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+            retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX2_Add, baseType, 32);
+            break;
 
         default:
-            return nullptr;
+            JITDUMP("Not implemented hardware intrinsic");
+            break;
     }
+    return retNode;
 }
 
 GenTree* Compiler::impAESIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
@@ -345,7 +506,7 @@ GenTree* Compiler::impAESIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND
     switch (intrinsic)
     {
         case NI_AES_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_AES));
+            return gtNewIconNode(featureSIMD && compSupports(InstructionSet_AES));
 
         default:
             return nullptr;
@@ -381,7 +542,7 @@ GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND
     switch (intrinsic)
     {
         case NI_FMA_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_FMA));
+            return gtNewIconNode(featureSIMD && compSupports(InstructionSet_FMA));
 
         default:
             return nullptr;
@@ -424,7 +585,7 @@ GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHO
     switch (intrinsic)
     {
         case NI_PCLMULQDQ_IsSupported:
-            return gtNewIconNode(compSupports(InstructionSet_PCLMULQDQ));
+            return gtNewIconNode(featureSIMD && compSupports(InstructionSet_PCLMULQDQ));
 
         default:
             return nullptr;
index f7513d4..e3f106a 100644 (file)
@@ -1083,14 +1083,16 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr           destAddr,
     assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
            src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
            src->gtOper == GT_COMMA || src->gtOper == GT_ADDR ||
-           (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+           (src->TypeGet() != TYP_STRUCT &&
+            (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
 #else  // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
     assert(varTypeIsStruct(src));
 
     assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
            src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
            src->gtOper == GT_COMMA ||
-           (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+           (src->TypeGet() != TYP_STRUCT &&
+            (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
     if (destAddr->OperGet() == GT_ADDR)
     {
@@ -1596,6 +1598,11 @@ GenTreePtr Compiler::impNormStructVal(GenTreePtr           structVal,
             assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
             break;
 #endif // FEATURE_SIMD
+#if FEATURE_HW_INTRINSICS
+        case GT_HWIntrinsic:
+            assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
+            break;
+#endif
 
         case GT_COMMA:
         {
@@ -1624,6 +1631,14 @@ GenTreePtr Compiler::impNormStructVal(GenTreePtr           structVal,
             }
             else
 #endif
+#if FEATURE_HW_INTRINSICS
+                if (blockNode->OperGet() == GT_HWIntrinsic && blockNode->AsHWIntrinsic()->isSIMD())
+            {
+                parent->gtOp.gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization);
+                alreadyNormalized  = true;
+            }
+            else
+#endif
             {
                 assert(blockNode->OperIsBlk());
 
@@ -3852,7 +3867,7 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
     {
         assert(retNode == nullptr);
         const NamedIntrinsic ni = lookupNamedIntrinsic(method);
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
         if (ni > NI_HW_INTRINSIC_START && ni < NI_HW_INTRINSIC_END)
         {
             return impX86HWIntrinsic(ni, method, sig);
@@ -4080,7 +4095,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
         }
     }
 
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
     if ((namespaceName != nullptr) && strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0)
     {
         InstructionSet isa = lookupHWIntrinsicISA(className);
index 970793d..37388d4 100644 (file)
@@ -388,7 +388,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
                 addr->ClearContained();
             }
         }
-        else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+        else if (!source->IsMultiRegCall() && !source->OperIsSIMD() && !source->OperIsSimdHWIntrinsic())
         {
             assert(source->IsLocal());
             MakeSrcContained(blkNode, source);
index 68c587b..4265289 100644 (file)
@@ -10175,7 +10175,7 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
                 needsIndirection = false;
                 effectiveVal     = indirTree->Addr()->gtGetOp1();
             }
-            if (effectiveVal->OperIsSIMD())
+            if (effectiveVal->OperIsSIMD() || effectiveVal->OperIsSimdHWIntrinsic())
             {
                 needsIndirection = false;
             }
index 568a8f7..6019972 100644 (file)
@@ -129,236 +129,493 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
         return TYP_UNKNOWN;
     }
 
-#if FEATURE_HW_INTRINSICS && DEBUG
-    if (isIntrinsicType(typeHnd))
-    {
-        JITDUMP("\nFound Vector Type: %s with base type %s\n", getClassNameFromMetadata(typeHnd, nullptr),
-                getClassNameFromMetadata(getTypeInstantiationArgument(typeHnd, 0), nullptr));
-    }
-#endif
-
     // fast path search using cached type handles of important types
     var_types simdBaseType = TYP_UNKNOWN;
     unsigned  size         = 0;
 
-    // Early return if it is not a SIMD module.
-    if (!isSIMDClass(typeHnd))
-    {
-        return TYP_UNKNOWN;
-    }
-
-    // The most likely to be used type handles are looked up first followed by
-    // less likely to be used type handles
-    if (typeHnd == SIMDFloatHandle)
-    {
-        simdBaseType = TYP_FLOAT;
-        JITDUMP("  Known type SIMD Vector<Float>\n");
-    }
-    else if (typeHnd == SIMDIntHandle)
-    {
-        simdBaseType = TYP_INT;
-        JITDUMP("  Known type SIMD Vector<Int>\n");
-    }
-    else if (typeHnd == SIMDVector2Handle)
-    {
-        simdBaseType = TYP_FLOAT;
-        size         = 2 * genTypeSize(TYP_FLOAT);
-        assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
-        JITDUMP("  Known type Vector2\n");
-    }
-    else if (typeHnd == SIMDVector3Handle)
-    {
-        simdBaseType = TYP_FLOAT;
-        size         = 3 * genTypeSize(TYP_FLOAT);
-        assert(size == info.compCompHnd->getClassSize(typeHnd));
-        JITDUMP("  Known type Vector3\n");
-    }
-    else if (typeHnd == SIMDVector4Handle)
-    {
-        simdBaseType = TYP_FLOAT;
-        size         = 4 * genTypeSize(TYP_FLOAT);
-        assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
-        JITDUMP("  Known type Vector4\n");
-    }
-    else if (typeHnd == SIMDVectorHandle)
-    {
-        JITDUMP("  Known type Vector\n");
-    }
-    else if (typeHnd == SIMDUShortHandle)
-    {
-        simdBaseType = TYP_CHAR;
-        JITDUMP("  Known type SIMD Vector<ushort>\n");
-    }
-    else if (typeHnd == SIMDUByteHandle)
+    // TODO - Optimize SIMD type recognition by IntrinsicAttribute
+    if (isSIMDClass(typeHnd))
     {
-        simdBaseType = TYP_UBYTE;
-        JITDUMP("  Known type SIMD Vector<ubyte>\n");
-    }
-    else if (typeHnd == SIMDDoubleHandle)
-    {
-        simdBaseType = TYP_DOUBLE;
-        JITDUMP("  Known type SIMD Vector<Double>\n");
-    }
-    else if (typeHnd == SIMDLongHandle)
-    {
-        simdBaseType = TYP_LONG;
-        JITDUMP("  Known type SIMD Vector<Long>\n");
-    }
-    else if (typeHnd == SIMDShortHandle)
-    {
-        simdBaseType = TYP_SHORT;
-        JITDUMP("  Known type SIMD Vector<short>\n");
-    }
-    else if (typeHnd == SIMDByteHandle)
-    {
-        simdBaseType = TYP_BYTE;
-        JITDUMP("  Known type SIMD Vector<byte>\n");
-    }
-    else if (typeHnd == SIMDUIntHandle)
-    {
-        simdBaseType = TYP_UINT;
-        JITDUMP("  Known type SIMD Vector<uint>\n");
-    }
-    else if (typeHnd == SIMDULongHandle)
-    {
-        simdBaseType = TYP_ULONG;
-        JITDUMP("  Known type SIMD Vector<ulong>\n");
-    }
-
-    // slow path search
-    if (simdBaseType == TYP_UNKNOWN)
-    {
-        // Doesn't match with any of the cached type handles.
-        // Obtain base type by parsing fully qualified class name.
-        //
-        // TODO-Throughput: implement product shipping solution to query base type.
-        WCHAR  className[256] = {0};
-        WCHAR* pbuf           = &className[0];
-        int    len            = _countof(className);
-        info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
-        noway_assert(pbuf < &className[256]);
-        JITDUMP("SIMD Candidate Type %S\n", className);
+        // The most likely to be used type handles are looked up first followed by
+        // less likely to be used type handles
+        if (typeHnd == SIMDFloatHandle)
+        {
+            simdBaseType = TYP_FLOAT;
+            JITDUMP("  Known type SIMD Vector<Float>\n");
+        }
+        else if (typeHnd == SIMDIntHandle)
+        {
+            simdBaseType = TYP_INT;
+            JITDUMP("  Known type SIMD Vector<Int>\n");
+        }
+        else if (typeHnd == SIMDVector2Handle)
+        {
+            simdBaseType = TYP_FLOAT;
+            size         = 2 * genTypeSize(TYP_FLOAT);
+            assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+            JITDUMP("  Known type Vector2\n");
+        }
+        else if (typeHnd == SIMDVector3Handle)
+        {
+            simdBaseType = TYP_FLOAT;
+            size         = 3 * genTypeSize(TYP_FLOAT);
+            assert(size == info.compCompHnd->getClassSize(typeHnd));
+            JITDUMP("  Known type Vector3\n");
+        }
+        else if (typeHnd == SIMDVector4Handle)
+        {
+            simdBaseType = TYP_FLOAT;
+            size         = 4 * genTypeSize(TYP_FLOAT);
+            assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+            JITDUMP("  Known type Vector4\n");
+        }
+        else if (typeHnd == SIMDVectorHandle)
+        {
+            JITDUMP("  Known type Vector\n");
+        }
+        else if (typeHnd == SIMDUShortHandle)
+        {
+            simdBaseType = TYP_CHAR;
+            JITDUMP("  Known type SIMD Vector<ushort>\n");
+        }
+        else if (typeHnd == SIMDUByteHandle)
+        {
+            simdBaseType = TYP_UBYTE;
+            JITDUMP("  Known type SIMD Vector<ubyte>\n");
+        }
+        else if (typeHnd == SIMDDoubleHandle)
+        {
+            simdBaseType = TYP_DOUBLE;
+            JITDUMP("  Known type SIMD Vector<Double>\n");
+        }
+        else if (typeHnd == SIMDLongHandle)
+        {
+            simdBaseType = TYP_LONG;
+            JITDUMP("  Known type SIMD Vector<Long>\n");
+        }
+        else if (typeHnd == SIMDShortHandle)
+        {
+            simdBaseType = TYP_SHORT;
+            JITDUMP("  Known type SIMD Vector<short>\n");
+        }
+        else if (typeHnd == SIMDByteHandle)
+        {
+            simdBaseType = TYP_BYTE;
+            JITDUMP("  Known type SIMD Vector<byte>\n");
+        }
+        else if (typeHnd == SIMDUIntHandle)
+        {
+            simdBaseType = TYP_UINT;
+            JITDUMP("  Known type SIMD Vector<uint>\n");
+        }
+        else if (typeHnd == SIMDULongHandle)
+        {
+            simdBaseType = TYP_ULONG;
+            JITDUMP("  Known type SIMD Vector<ulong>\n");
+        }
 
-        if (wcsncmp(className, W("System.Numerics."), 16) == 0)
+        // slow path search
+        if (simdBaseType == TYP_UNKNOWN)
         {
-            if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
+            // Doesn't match with any of the cached type handles.
+            // Obtain base type by parsing fully qualified class name.
+            //
+            // TODO-Throughput: implement product shipping solution to query base type.
+            WCHAR  className[256] = {0};
+            WCHAR* pbuf           = &className[0];
+            int    len            = _countof(className);
+            info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
+            noway_assert(pbuf < &className[256]);
+            JITDUMP("SIMD Candidate Type %S\n", className);
+
+            if (wcsncmp(className, W("System.Numerics."), 16) == 0)
             {
-                if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
-                {
-                    SIMDFloatHandle = typeHnd;
-                    simdBaseType    = TYP_FLOAT;
-                    JITDUMP("  Found type SIMD Vector<Float>\n");
-                }
-                else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
-                {
-                    SIMDIntHandle = typeHnd;
-                    simdBaseType  = TYP_INT;
-                    JITDUMP("  Found type SIMD Vector<Int>\n");
-                }
-                else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+                if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
                 {
-                    SIMDUShortHandle = typeHnd;
-                    simdBaseType     = TYP_CHAR;
-                    JITDUMP("  Found type SIMD Vector<ushort>\n");
-                }
-                else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
-                {
-                    SIMDUByteHandle = typeHnd;
-                    simdBaseType    = TYP_UBYTE;
-                    JITDUMP("  Found type SIMD Vector<ubyte>\n");
-                }
-                else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
-                {
-                    SIMDDoubleHandle = typeHnd;
-                    simdBaseType     = TYP_DOUBLE;
-                    JITDUMP("  Found type SIMD Vector<Double>\n");
-                }
-                else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
-                {
-                    SIMDLongHandle = typeHnd;
-                    simdBaseType   = TYP_LONG;
-                    JITDUMP("  Found type SIMD Vector<Long>\n");
+                    if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
+                    {
+                        SIMDFloatHandle = typeHnd;
+                        simdBaseType    = TYP_FLOAT;
+                        JITDUMP("  Found type SIMD Vector<Float>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
+                    {
+                        SIMDIntHandle = typeHnd;
+                        simdBaseType  = TYP_INT;
+                        JITDUMP("  Found type SIMD Vector<Int>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+                    {
+                        SIMDUShortHandle = typeHnd;
+                        simdBaseType     = TYP_CHAR;
+                        JITDUMP("  Found type SIMD Vector<ushort>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
+                    {
+                        SIMDUByteHandle = typeHnd;
+                        simdBaseType    = TYP_UBYTE;
+                        JITDUMP("  Found type SIMD Vector<ubyte>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
+                    {
+                        SIMDDoubleHandle = typeHnd;
+                        simdBaseType     = TYP_DOUBLE;
+                        JITDUMP("  Found type SIMD Vector<Double>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
+                    {
+                        SIMDLongHandle = typeHnd;
+                        simdBaseType   = TYP_LONG;
+                        JITDUMP("  Found type SIMD Vector<Long>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+                    {
+                        SIMDShortHandle = typeHnd;
+                        simdBaseType    = TYP_SHORT;
+                        JITDUMP("  Found type SIMD Vector<short>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+                    {
+                        SIMDByteHandle = typeHnd;
+                        simdBaseType   = TYP_BYTE;
+                        JITDUMP("  Found type SIMD Vector<byte>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+                    {
+                        SIMDUIntHandle = typeHnd;
+                        simdBaseType   = TYP_UINT;
+                        JITDUMP("  Found type SIMD Vector<uint>\n");
+                    }
+                    else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+                    {
+                        SIMDULongHandle = typeHnd;
+                        simdBaseType    = TYP_ULONG;
+                        JITDUMP("  Found type SIMD Vector<ulong>\n");
+                    }
+                    else
+                    {
+                        JITDUMP("  Unknown SIMD Vector<T>\n");
+                    }
                 }
-                else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+                else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
                 {
-                    SIMDShortHandle = typeHnd;
-                    simdBaseType    = TYP_SHORT;
-                    JITDUMP("  Found type SIMD Vector<short>\n");
+                    SIMDVector2Handle = typeHnd;
+
+                    simdBaseType = TYP_FLOAT;
+                    size         = 2 * genTypeSize(TYP_FLOAT);
+                    assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+                    JITDUMP(" Found Vector2\n");
                 }
-                else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+                else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
                 {
-                    SIMDByteHandle = typeHnd;
-                    simdBaseType   = TYP_BYTE;
-                    JITDUMP("  Found type SIMD Vector<byte>\n");
+                    SIMDVector3Handle = typeHnd;
+
+                    simdBaseType = TYP_FLOAT;
+                    size         = 3 * genTypeSize(TYP_FLOAT);
+                    assert(size == info.compCompHnd->getClassSize(typeHnd));
+                    JITDUMP(" Found Vector3\n");
                 }
-                else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+                else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
                 {
-                    SIMDUIntHandle = typeHnd;
-                    simdBaseType   = TYP_UINT;
-                    JITDUMP("  Found type SIMD Vector<uint>\n");
+                    SIMDVector4Handle = typeHnd;
+
+                    simdBaseType = TYP_FLOAT;
+                    size         = 4 * genTypeSize(TYP_FLOAT);
+                    assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+                    JITDUMP(" Found Vector4\n");
                 }
-                else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+                else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
                 {
-                    SIMDULongHandle = typeHnd;
-                    simdBaseType    = TYP_ULONG;
-                    JITDUMP("  Found type SIMD Vector<ulong>\n");
+                    SIMDVectorHandle = typeHnd;
+                    JITDUMP(" Found type Vector\n");
                 }
                 else
                 {
-                    JITDUMP("  Unknown SIMD Vector<T>\n");
+                    JITDUMP("  Unknown SIMD Type\n");
                 }
             }
-            else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
+        }
+        if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
+        {
+            // If not a fixed size vector then its size is same as SIMD vector
+            // register length in bytes
+            if (size == 0)
             {
-                SIMDVector2Handle = typeHnd;
-
-                simdBaseType = TYP_FLOAT;
-                size         = 2 * genTypeSize(TYP_FLOAT);
-                assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
-                JITDUMP(" Found Vector2\n");
+                size = getSIMDVectorRegisterByteLength();
             }
-            else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
-            {
-                SIMDVector3Handle = typeHnd;
 
-                simdBaseType = TYP_FLOAT;
-                size         = 3 * genTypeSize(TYP_FLOAT);
-                assert(size == info.compCompHnd->getClassSize(typeHnd));
-                JITDUMP(" Found Vector3\n");
-            }
-            else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
-            {
-                SIMDVector4Handle = typeHnd;
+            *sizeBytes = size;
+            setUsesSIMDTypes(true);
+        }
+    }
+#if FEATURE_HW_INTRINSICS
+    else if (isIntrinsicType(typeHnd))
+    {
+        if (typeHnd == Vector256FloatHandle)
+        {
+            simdBaseType = TYP_FLOAT;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<float>\n");
+        }
+        else if (typeHnd == Vector256DoubleHandle)
+        {
+            simdBaseType = TYP_DOUBLE;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<double>\n");
+        }
+        else if (typeHnd == Vector256IntHandle)
+        {
+            simdBaseType = TYP_INT;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<int>\n");
+        }
+        else if (typeHnd == Vector256UIntHandle)
+        {
+            simdBaseType = TYP_UINT;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<uint>\n");
+        }
+        else if (typeHnd == Vector256ShortHandle)
+        {
+            simdBaseType = TYP_SHORT;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<short>\n");
+        }
+        else if (typeHnd == Vector256UShortHandle)
+        {
+            simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<ushort>\n");
+        }
+        else if (typeHnd == Vector256ByteHandle)
+        {
+            simdBaseType = TYP_BYTE;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<sbyte>\n");
+        }
+        else if (typeHnd == Vector256UByteHandle)
+        {
+            simdBaseType = TYP_UBYTE;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<byte>\n");
+        }
+        else if (typeHnd == Vector256LongHandle)
+        {
+            simdBaseType = TYP_LONG;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<long>\n");
+        }
+        else if (typeHnd == Vector256ULongHandle)
+        {
+            simdBaseType = TYP_ULONG;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector256<ulong>\n");
+        }
+        else if (typeHnd == Vector256FloatHandle)
+        {
+            simdBaseType = TYP_FLOAT;
+            size         = YMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<float>\n");
+        }
+        else if (typeHnd == Vector128DoubleHandle)
+        {
+            simdBaseType = TYP_DOUBLE;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<double>\n");
+        }
+        else if (typeHnd == Vector128IntHandle)
+        {
+            simdBaseType = TYP_INT;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<int>\n");
+        }
+        else if (typeHnd == Vector128UIntHandle)
+        {
+            simdBaseType = TYP_UINT;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<uint>\n");
+        }
+        else if (typeHnd == Vector128ShortHandle)
+        {
+            simdBaseType = TYP_SHORT;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<short>\n");
+        }
+        else if (typeHnd == Vector128UShortHandle)
+        {
+            simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<ushort>\n");
+        }
+        else if (typeHnd == Vector128ByteHandle)
+        {
+            simdBaseType = TYP_BYTE;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<sbyte>\n");
+        }
+        else if (typeHnd == Vector128UByteHandle)
+        {
+            simdBaseType = TYP_UBYTE;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<byte>\n");
+        }
+        else if (typeHnd == Vector128LongHandle)
+        {
+            simdBaseType = TYP_LONG;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<long>\n");
+        }
+        else if (typeHnd == Vector128ULongHandle)
+        {
+            simdBaseType = TYP_ULONG;
+            size         = XMM_REGSIZE_BYTES;
+            JITDUMP("  Known type Vector128<ulong>\n");
+        }
 
-                simdBaseType = TYP_FLOAT;
-                size         = 4 * genTypeSize(TYP_FLOAT);
-                assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
-                JITDUMP(" Found Vector4\n");
-            }
-            else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
-            {
-                SIMDVectorHandle = typeHnd;
-                JITDUMP(" Found type Vector\n");
-            }
-            else
+        // slow path search
+        if (simdBaseType == TYP_UNKNOWN)
+        {
+            // Doesn't match with any of the cached type handles.
+            const char*          className   = getClassNameFromMetadata(typeHnd, nullptr);
+            CORINFO_CLASS_HANDLE baseTypeHnd = getTypeInstantiationArgument(typeHnd, 0);
+
+            if (baseTypeHnd != nullptr)
             {
-                JITDUMP("  Unknown SIMD Type\n");
+                CorInfoType type = info.compCompHnd->getTypeForPrimitiveNumericClass(baseTypeHnd);
+
+                JITDUMP("HW Intrinsic SIMD Candidate Type %s with Base Type %s\n", className,
+                        getClassNameFromMetadata(baseTypeHnd, nullptr));
+
+                if (strcmp(className, "Vector256`1") == 0)
+                {
+                    size = YMM_REGSIZE_BYTES;
+                    switch (type)
+                    {
+                        case CORINFO_TYPE_FLOAT:
+                            Vector256FloatHandle = typeHnd;
+                            simdBaseType         = TYP_FLOAT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<float>\n");
+                            break;
+                        case CORINFO_TYPE_DOUBLE:
+                            Vector256DoubleHandle = typeHnd;
+                            simdBaseType          = TYP_DOUBLE;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<double>\n");
+                            break;
+                        case CORINFO_TYPE_INT:
+                            Vector256IntHandle = typeHnd;
+                            simdBaseType       = TYP_INT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<int>\n");
+                            break;
+                        case CORINFO_TYPE_UINT:
+                            Vector256UIntHandle = typeHnd;
+                            simdBaseType        = TYP_UINT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<uint>\n");
+                            break;
+                        case CORINFO_TYPE_SHORT:
+                            Vector256ShortHandle = typeHnd;
+                            simdBaseType         = TYP_SHORT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<short>\n");
+                            break;
+                        case CORINFO_TYPE_USHORT:
+                            Vector256UShortHandle = typeHnd;
+                            simdBaseType          = TYP_CHAR; // TODO TYP_USHORT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<ushort>\n");
+                            break;
+                        case CORINFO_TYPE_LONG:
+                            Vector256LongHandle = typeHnd;
+                            simdBaseType        = TYP_LONG;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<long>\n");
+                            break;
+                        case CORINFO_TYPE_ULONG:
+                            Vector256ULongHandle = typeHnd;
+                            simdBaseType         = TYP_ULONG;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<ulong>\n");
+                            break;
+                        case CORINFO_TYPE_UBYTE:
+                            Vector256UByteHandle = typeHnd;
+                            simdBaseType         = TYP_UBYTE;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<byte>\n");
+                            break;
+                        case CORINFO_TYPE_BYTE:
+                            Vector256ByteHandle = typeHnd;
+                            simdBaseType        = TYP_BYTE;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector256<sbyte>\n");
+                            break;
+
+                        default:
+                            JITDUMP("  Unknown Hardware Intrinsic SIMD Type Vector256<T>\n");
+                    }
+                }
+                else if (strcmp(className, "Vector128`1") == 0)
+                {
+                    size = XMM_REGSIZE_BYTES;
+                    switch (type)
+                    {
+                        case CORINFO_TYPE_FLOAT:
+                            Vector128FloatHandle = typeHnd;
+                            simdBaseType         = TYP_FLOAT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<float>\n");
+                            break;
+                        case CORINFO_TYPE_DOUBLE:
+                            Vector128DoubleHandle = typeHnd;
+                            simdBaseType          = TYP_DOUBLE;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<double>\n");
+                            break;
+                        case CORINFO_TYPE_INT:
+                            Vector128IntHandle = typeHnd;
+                            simdBaseType       = TYP_INT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<int>\n");
+                            break;
+                        case CORINFO_TYPE_UINT:
+                            Vector128UIntHandle = typeHnd;
+                            simdBaseType        = TYP_UINT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<uint>\n");
+                            break;
+                        case CORINFO_TYPE_SHORT:
+                            Vector128ShortHandle = typeHnd;
+                            simdBaseType         = TYP_SHORT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<short>\n");
+                            break;
+                        case CORINFO_TYPE_USHORT:
+                            Vector128UShortHandle = typeHnd;
+                            simdBaseType          = TYP_CHAR; // TODO TYP_USHORT;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<ushort>\n");
+                            break;
+                        case CORINFO_TYPE_LONG:
+                            Vector128LongHandle = typeHnd;
+                            simdBaseType        = TYP_LONG;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<long>\n");
+                            break;
+                        case CORINFO_TYPE_ULONG:
+                            Vector128ULongHandle = typeHnd;
+                            simdBaseType         = TYP_ULONG;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<ulong>\n");
+                            break;
+                        case CORINFO_TYPE_UBYTE:
+                            Vector128UByteHandle = typeHnd;
+                            simdBaseType         = TYP_UBYTE;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<byte>\n");
+                            break;
+                        case CORINFO_TYPE_BYTE:
+                            Vector128ByteHandle = typeHnd;
+                            simdBaseType        = TYP_BYTE;
+                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector128<sbyte>\n");
+                            break;
+
+                        default:
+                            JITDUMP("  Unknown Hardware Intrinsic SIMD Type Vector128<T>\n");
+                    }
+                }
             }
         }
-    }
 
-    if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
-    {
-        // If not a fixed size vector then its size is same as SIMD vector
-        // register length in bytes
-        if (size == 0)
+        if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
         {
-            size = getSIMDVectorRegisterByteLength();
+            *sizeBytes = size;
+            setUsesSIMDTypes(true);
         }
-
-        *sizeBytes = size;
-        setUsesSIMDTypes(true);
     }
+#endif // FEATURE_HW_INTRINSICS
 
     return simdBaseType;
 }
index 349700a..6f99063 100644 (file)
@@ -4503,6 +4503,49 @@ CorInfoType CEEInfo::getTypeForPrimitiveValueClass(
     return result;
 }
 
+/*********************************************************************/
+CorInfoType CEEInfo::getTypeForPrimitiveNumericClass(
+        CORINFO_CLASS_HANDLE clsHnd)
+{
+    CONTRACTL {
+        SO_TOLERANT;
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    } CONTRACTL_END;
+
+    CorInfoType result = CORINFO_TYPE_UNDEF;
+
+    JIT_TO_EE_TRANSITION_LEAF();
+
+    TypeHandle th(clsHnd);
+    _ASSERTE (!th.IsGenericVariable());
+
+    CorElementType ty = th.GetSignatureCorElementType();
+    switch (ty)
+    {
+        case ELEMENT_TYPE_I1:
+        case ELEMENT_TYPE_U1:
+        case ELEMENT_TYPE_I2:
+        case ELEMENT_TYPE_U2:
+        case ELEMENT_TYPE_I4:
+        case ELEMENT_TYPE_U4:
+        case ELEMENT_TYPE_I8:
+        case ELEMENT_TYPE_U8:
+        case ELEMENT_TYPE_R4:
+        case ELEMENT_TYPE_R8:
+            result = asCorInfoType(ty);
+            break;
+
+        default:
+            break;
+    }
+
+    JIT_TO_EE_TRANSITION_LEAF();
+
+    return result;
+}
+
 
 void CEEInfo::getGSCookie(GSCookie * pCookieVal, GSCookie ** ppCookieVal)
 {
index 71872d3..74397c8 100644 (file)
@@ -551,6 +551,12 @@ public:
             CORINFO_CLASS_HANDLE        cls
             );
 
+    // "System.Int32" ==> CORINFO_TYPE_INT..
+    // "System.UInt32" ==> CORINFO_TYPE_UINT..
+    CorInfoType getTypeForPrimitiveNumericClass(
+            CORINFO_CLASS_HANDLE        cls
+            );
+
     // TRUE if child is a subtype of parent
     // if parent is an interface, then does child implement / extend parent
     BOOL canCast(
diff --git a/tests/src/JIT/HardwareIntrinsics/Add.cs b/tests/src/JIT/HardwareIntrinsics/Add.cs
new file mode 100644 (file)
index 0000000..cf39254
--- /dev/null
@@ -0,0 +1,428 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+    class Program
+    {
+        const int Pass = 100;
+        const int Fail = 0;
+
+        static unsafe int Main(string[] args)
+        {
+            int testResult = Pass;
+
+            if (Avx.IsSupported)
+            {
+                using (TestTable<float> floatTable = new TestTable<float>(new float[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new float[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new float[8]))
+                using (TestTable<double> doubleTable = new TestTable<double>(new double[4] { 1, -5, 100, 0 }, new double[4] { 22, -1, -50, 0 }, new double[4]))
+                {
+                    var vf1 = Unsafe.Read<Vector256<float>>(floatTable.inArray1Ptr);
+                    var vf2 = Unsafe.Read<Vector256<float>>(floatTable.inArray2Ptr);
+                    var vf3 = Avx.Add(vf1, vf2);
+                    Unsafe.Write(floatTable.outArrayPtr, vf3);
+
+                    var vd1 = Unsafe.Read<Vector256<double>>(doubleTable.inArray1Ptr);
+                    var vd2 = Unsafe.Read<Vector256<double>>(doubleTable.inArray2Ptr);
+                    var vd3 = Avx.Add(vd1, vd2);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+                    if (!floatTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX Add failed on float:");
+                        foreach (var item in floatTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!doubleTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX Add failed on double:");
+                        foreach (var item in doubleTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+                }
+            }
+
+            if (Avx2.IsSupported)
+            {
+                using (TestTable<int> intTable = new TestTable<int>(new int[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new int[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new int[8]))
+                using (TestTable<long> longTable = new TestTable<long>(new long[4] { 1, -5, 100, 0 }, new long[4] { 22, -1, -50, 0 }, new long[4]))
+                using (TestTable<uint> uintTable = new TestTable<uint>(new uint[8] { 1, 5, 100, 0, 1, 5, 100, 0 }, new uint[8] { 22, 1, 50, 0, 22, 1, 50, 0 }, new uint[8]))
+                using (TestTable<ulong> ulongTable = new TestTable<ulong>(new ulong[4] { 1, 5, 100, 0 }, new ulong[4] { 22, 1, 50, 0 }, new ulong[4]))
+                using (TestTable<short> shortTable = new TestTable<short>(new short[16] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new short[16] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new short[16]))
+                using (TestTable<ushort> ushortTable = new TestTable<ushort>(new ushort[16] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new ushort[16] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new ushort[16]))
+                using (TestTable<sbyte> sbyteTable = new TestTable<sbyte>(new sbyte[32] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new sbyte[32] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new sbyte[32]))
+                using (TestTable<byte> byteTable = new TestTable<byte>(new byte[32] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new byte[32] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new byte[32]))
+                {
+
+                    var vi1 = Unsafe.Read<Vector256<int>>(intTable.inArray1Ptr);
+                    var vi2 = Unsafe.Read<Vector256<int>>(intTable.inArray2Ptr);
+                    var vi3 = Avx2.Add(vi1, vi2);
+                    Unsafe.Write(intTable.outArrayPtr, vi3);
+
+                    var vl1 = Unsafe.Read<Vector256<long>>(longTable.inArray1Ptr);
+                    var vl2 = Unsafe.Read<Vector256<long>>(longTable.inArray2Ptr);
+                    var vl3 = Avx2.Add(vl1, vl2);
+                    Unsafe.Write(longTable.outArrayPtr, vl3);
+
+                    var vui1 = Unsafe.Read<Vector256<uint>>(uintTable.inArray1Ptr);
+                    var vui2 = Unsafe.Read<Vector256<uint>>(uintTable.inArray2Ptr);
+                    var vui3 = Avx2.Add(vui1, vui2);
+                    Unsafe.Write(uintTable.outArrayPtr, vui3);
+
+                    var vul1 = Unsafe.Read<Vector256<ulong>>(ulongTable.inArray1Ptr);
+                    var vul2 = Unsafe.Read<Vector256<ulong>>(ulongTable.inArray2Ptr);
+                    var vul3 = Avx2.Add(vul1, vul2);
+                    Unsafe.Write(ulongTable.outArrayPtr, vul3);
+
+                    var vs1 = Unsafe.Read<Vector256<short>>(shortTable.inArray1Ptr);
+                    var vs2 = Unsafe.Read<Vector256<short>>(shortTable.inArray2Ptr);
+                    var vs3 = Avx2.Add(vs1, vs2);
+                    Unsafe.Write(shortTable.outArrayPtr, vs3);
+
+                    var vus1 = Unsafe.Read<Vector256<ushort>>(ushortTable.inArray1Ptr);
+                    var vus2 = Unsafe.Read<Vector256<ushort>>(ushortTable.inArray2Ptr);
+                    var vus3 = Avx2.Add(vus1, vus2);
+                    Unsafe.Write(ushortTable.outArrayPtr, vus3);
+
+                    var vsb1 = Unsafe.Read<Vector256<sbyte>>(sbyteTable.inArray1Ptr);
+                    var vsb2 = Unsafe.Read<Vector256<sbyte>>(sbyteTable.inArray2Ptr);
+                    var vsb3 = Avx2.Add(vsb1, vsb2);
+                    Unsafe.Write(sbyteTable.outArrayPtr, vsb3);
+
+                    var vb1 = Unsafe.Read<Vector256<byte>>(byteTable.inArray1Ptr);
+                    var vb2 = Unsafe.Read<Vector256<byte>>(byteTable.inArray2Ptr);
+                    var vb3 = Avx2.Add(vb1, vb2);
+                    Unsafe.Write(byteTable.outArrayPtr, vb3);
+
+                    if (!intTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on int:");
+                        foreach (var item in intTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!longTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on long:");
+                        foreach (var item in longTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!uintTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on uint:");
+                        foreach (var item in uintTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!ulongTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on ulong:");
+                        foreach (var item in ulongTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!shortTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on short:");
+                        foreach (var item in shortTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!ushortTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on ushort:");
+                        foreach (var item in ushortTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!sbyteTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on sbyte:");
+                        foreach (var item in sbyteTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!byteTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("AVX2 Add failed on byte:");
+                        foreach (var item in byteTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+                }
+
+            }
+
+            if (Sse2.IsSupported)
+            {
+                using (TestTable<float> floatTable = new TestTable<float>(new float[4] { 1, -5, 100, 0 }, new float[4] { 22, -1, -50, 0 }, new float[4]))
+                using (TestTable<double> doubleTable = new TestTable<double>(new double[2] { 1, -5 }, new double[2] { 22, -1 }, new double[2]))
+                using (TestTable<int> intTable = new TestTable<int>(new int[4] { 1, -5, 100, 0 }, new int[4] { 22, -1, -50, 0 }, new int[4]))
+                using (TestTable<long> longTable = new TestTable<long>(new long[2] { 1, -5 }, new long[2] { 22, -1 }, new long[2]))
+                using (TestTable<uint> uintTable = new TestTable<uint>(new uint[4] { 1, 5, 100, 0 }, new uint[4] { 22, 1, 50, 0 }, new uint[4]))
+                using (TestTable<ulong> ulongTable = new TestTable<ulong>(new ulong[2] { 1, 5 }, new ulong[2] { 22, 1 }, new ulong[2]))
+                using (TestTable<short> shortTable = new TestTable<short>(new short[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new short[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new short[8]))
+                using (TestTable<ushort> ushortTable = new TestTable<ushort>(new ushort[8] { 1, 5, 100, 0, 1, 5, 100, 0 }, new ushort[8] { 22, 1, 50, 0, 22, 1, 50, 0 }, new ushort[8]))
+                using (TestTable<sbyte> sbyteTable = new TestTable<sbyte>(new sbyte[16] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new sbyte[16] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new sbyte[16]))
+                using (TestTable<byte> byteTable = new TestTable<byte>(new byte[16] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new byte[16] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new byte[16]))
+                {
+
+                    var vf1 = Unsafe.Read<Vector128<float>>(floatTable.inArray1Ptr);
+                    var vf2 = Unsafe.Read<Vector128<float>>(floatTable.inArray2Ptr);
+                    var vf3 = Sse.Add(vf1, vf2);
+                    Unsafe.Write(floatTable.outArrayPtr, vf3);
+
+                    var vd1 = Unsafe.Read<Vector128<double>>(doubleTable.inArray1Ptr);
+                    var vd2 = Unsafe.Read<Vector128<double>>(doubleTable.inArray2Ptr);
+                    var vd3 = Sse2.Add(vd1, vd2);
+                    Unsafe.Write(doubleTable.outArrayPtr, vd3);
+                    var vi1 = Unsafe.Read<Vector128<int>>(intTable.inArray1Ptr);
+                    var vi2 = Unsafe.Read<Vector128<int>>(intTable.inArray2Ptr);
+                    var vi3 = Sse2.Add(vi1, vi2);
+                    Unsafe.Write(intTable.outArrayPtr, vi3);
+                    var vl1 = Unsafe.Read<Vector128<long>>(longTable.inArray1Ptr);
+                    var vl2 = Unsafe.Read<Vector128<long>>(longTable.inArray2Ptr);
+                    var vl3 = Sse2.Add(vl1, vl2);
+                    Unsafe.Write(longTable.outArrayPtr, vl3);
+
+                    var vui1 = Unsafe.Read<Vector128<uint>>(uintTable.inArray1Ptr);
+                    var vui2 = Unsafe.Read<Vector128<uint>>(uintTable.inArray2Ptr);
+                    var vui3 = Sse2.Add(vui1, vui2);
+                    Unsafe.Write(uintTable.outArrayPtr, vui3);
+                    var vul1 = Unsafe.Read<Vector128<ulong>>(ulongTable.inArray1Ptr);
+                    var vul2 = Unsafe.Read<Vector128<ulong>>(ulongTable.inArray2Ptr);
+                    var vul3 = Sse2.Add(vul1, vul2);
+                    Unsafe.Write(ulongTable.outArrayPtr, vul3);
+
+                    var vs1 = Unsafe.Read<Vector128<short>>(shortTable.inArray1Ptr);
+                    var vs2 = Unsafe.Read<Vector128<short>>(shortTable.inArray2Ptr);
+                    var vs3 = Sse2.Add(vs1, vs2);
+                    Unsafe.Write(shortTable.outArrayPtr, vs3);
+
+                    var vus1 = Unsafe.Read<Vector128<ushort>>(ushortTable.inArray1Ptr);
+                    var vus2 = Unsafe.Read<Vector128<ushort>>(ushortTable.inArray2Ptr);
+                    var vus3 = Sse2.Add(vus1, vus2);
+                    Unsafe.Write(ushortTable.outArrayPtr, vus3);
+
+                    var vsb1 = Unsafe.Read<Vector128<sbyte>>(sbyteTable.inArray1Ptr);
+                    var vsb2 = Unsafe.Read<Vector128<sbyte>>(sbyteTable.inArray2Ptr);
+                    var vsb3 = Sse2.Add(vsb1, vsb2);
+                    Unsafe.Write(sbyteTable.outArrayPtr, vsb3);
+
+                    var vb1 = Unsafe.Read<Vector128<byte>>(byteTable.inArray1Ptr);
+                    var vb2 = Unsafe.Read<Vector128<byte>>(byteTable.inArray2Ptr);
+                    var vb3 = Sse2.Add(vb1, vb2);
+                    Unsafe.Write(byteTable.outArrayPtr, vb3);
+
+                    if (!intTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on int:");
+                        foreach (var item in intTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!longTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on long:");
+                        foreach (var item in longTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!uintTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on uint:");
+                        foreach (var item in uintTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!ulongTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on ulong:");
+                        foreach (var item in ulongTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!shortTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on short:");
+                        foreach (var item in shortTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!ushortTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on ushort:");
+                        foreach (var item in ushortTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!floatTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE Add failed on float:");
+                        foreach (var item in floatTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!doubleTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on double:");
+                        foreach (var item in doubleTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!sbyteTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on sbyte:");
+                        foreach (var item in sbyteTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+
+                    if (!byteTable.CheckResult((x, y, z) => x + y == z))
+                    {
+                        Console.WriteLine("SSE2 Add failed on byte:");
+                        foreach (var item in byteTable.outArray)
+                        {
+                            Console.Write(item + ", ");
+                        }
+                        Console.WriteLine();
+                        testResult = Fail;
+                    }
+                }
+            }
+
+
+            return testResult;
+        }
+
+        public unsafe struct TestTable<T> : IDisposable where T : struct
+        {
+            public T[] inArray1;
+            public T[] inArray2;
+            public T[] outArray;
+
+            public void* inArray1Ptr => inHandle1.AddrOfPinnedObject().ToPointer();
+            public void* inArray2Ptr => inHandle2.AddrOfPinnedObject().ToPointer();
+            public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer();
+
+            GCHandle inHandle1;
+            GCHandle inHandle2;
+            GCHandle outHandle;
+            public TestTable(T[] a, T[] b, T[] c)
+            {
+                this.inArray1 = a;
+                this.inArray2 = b;
+                this.outArray = c;
+
+                inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned);
+                inHandle2 = GCHandle.Alloc(inArray2, GCHandleType.Pinned);
+                outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned);
+            }
+            public bool CheckResult(Func<T, T, T, bool> check)
+            {
+                for (int i = 0; i < inArray1.Length; i++)
+                {
+                    if (!check(inArray1[i], inArray2[i], outArray[i]))
+                    {
+                        return false;
+                    }
+                }
+                return true;
+            }
+
+            public void Dispose()
+            {
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/Add_r.csproj b/tests/src/JIT/HardwareIntrinsics/Add_r.csproj
new file mode 100644 (file)
index 0000000..6179540
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize></Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Add.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/Add_ro.csproj b/tests/src/JIT/HardwareIntrinsics/Add_ro.csproj
new file mode 100644 (file)
index 0000000..7c5ee7c
--- /dev/null
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Add.cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file