#define SELECTANY extern __declspec(selectany)
#endif
-SELECTANY const GUID JITEEVersionIdentifier = { /* 01c3d216-a404-4290-8278-ac27a4793d31 */
- 0x01c3d216,
- 0xa404,
- 0x4290,
- {0x82, 0x78, 0xac, 0x27, 0xa4, 0x79, 0x3d, 0x31}
+SELECTANY const GUID JITEEVersionIdentifier = { /* 19258069-1777-4691-87DF-DADF8F352875 */
+ 0x19258069,
+ 0x1777,
+ 0x4691,
+ { 0x87, 0xdf, 0xda, 0xdf, 0x8f, 0x35, 0x28, 0x75 }
};
+
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// END JITEEVersionIdentifier
CORINFO_CLASS_HANDLE cls
) = 0;
+ // "System.Int32" ==> CORINFO_TYPE_INT..
+ // "System.UInt32" ==> CORINFO_TYPE_UINT..
+ virtual CorInfoType getTypeForPrimitiveNumericClass(
+ CORINFO_CLASS_HANDLE cls
+ ) = 0;
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
virtual BOOL canCast(
#endif // _TARGET_X86_
#endif // FEATURE_SIMD
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
void genHWIntrinsic(GenTreeHWIntrinsic* node);
void genSSEIntrinsic(GenTreeHWIntrinsic* node);
void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
SIMDVector3Handle = nullptr;
SIMDVector4Handle = nullptr;
SIMDVectorHandle = nullptr;
-#endif
+#if FEATURE_HW_INTRINSICS
+ Vector128FloatHandle = nullptr;
+ Vector128DoubleHandle = nullptr;
+ Vector128IntHandle = nullptr;
+ Vector128UShortHandle = nullptr;
+ Vector128UByteHandle = nullptr;
+ Vector128ShortHandle = nullptr;
+ Vector128ByteHandle = nullptr;
+ Vector128LongHandle = nullptr;
+ Vector128UIntHandle = nullptr;
+ Vector256FloatHandle = nullptr;
+ Vector256DoubleHandle = nullptr;
+ Vector256IntHandle = nullptr;
+ Vector256UShortHandle = nullptr;
+ Vector256UByteHandle = nullptr;
+ Vector256ShortHandle = nullptr;
+ Vector256ByteHandle = nullptr;
+ Vector256LongHandle = nullptr;
+ Vector256UIntHandle = nullptr;
+#endif // FEATURE_HW_INTRINSICS
+#endif // FEATURE_SIMD
compUsesThrowHelper = false;
}
GenTree* op2,
NamedIntrinsic hwIntrinsicID);
GenTree* gtNewMustThrowException(unsigned helper, var_types type);
+ CORINFO_CLASS_HANDLE gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType);
#endif // FEATURE_HW_INTRINSICS
GenTreePtr gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
InstructionSet lookupHWIntrinsicISA(const char* className);
NamedIntrinsic lookupHWIntrinsic(const char* methodName, InstructionSet isa);
InstructionSet isaOfHWIntrinsic(NamedIntrinsic intrinsic);
+ bool isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic);
+#ifdef _TARGET_XARCH_
GenTree* impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impLZCNTIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impPCLMULQDQIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impPOPCNTIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
+#endif // _TARGET_XARCH_
#endif // FEATURE_HW_INTRINSICS
GenTreePtr impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
CORINFO_SIG_INFO* sig,
CORINFO_CLASS_HANDLE SIMDVector4Handle;
CORINFO_CLASS_HANDLE SIMDVectorHandle;
+#if FEATURE_HW_INTRINSICS
+ CORINFO_CLASS_HANDLE Vector128FloatHandle;
+ CORINFO_CLASS_HANDLE Vector128DoubleHandle;
+ CORINFO_CLASS_HANDLE Vector128IntHandle;
+ CORINFO_CLASS_HANDLE Vector128UShortHandle;
+ CORINFO_CLASS_HANDLE Vector128UByteHandle;
+ CORINFO_CLASS_HANDLE Vector128ShortHandle;
+ CORINFO_CLASS_HANDLE Vector128ByteHandle;
+ CORINFO_CLASS_HANDLE Vector128LongHandle;
+ CORINFO_CLASS_HANDLE Vector128UIntHandle;
+ CORINFO_CLASS_HANDLE Vector128ULongHandle;
+ CORINFO_CLASS_HANDLE Vector256FloatHandle;
+ CORINFO_CLASS_HANDLE Vector256DoubleHandle;
+ CORINFO_CLASS_HANDLE Vector256IntHandle;
+ CORINFO_CLASS_HANDLE Vector256UShortHandle;
+ CORINFO_CLASS_HANDLE Vector256UByteHandle;
+ CORINFO_CLASS_HANDLE Vector256ShortHandle;
+ CORINFO_CLASS_HANDLE Vector256ByteHandle;
+ CORINFO_CLASS_HANDLE Vector256LongHandle;
+ CORINFO_CLASS_HANDLE Vector256UIntHandle;
+ CORINFO_CLASS_HANDLE Vector256ULongHandle;
+#endif
+
// Get the handle for a SIMD type.
CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, var_types simdBaseType)
{
emitAdjustStackDepthPushPop(ins);
}
+#if FEATURE_HW_INTRINSICS
+void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype)
+{
+ if (UseVEXEncoding() && reg1 != reg)
+ {
+ emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2);
+ }
+ else
+ {
+ if (reg1 != reg)
+ {
+ emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
+ }
+ emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2);
+ }
+}
+#endif
+
/*****************************************************************************
*
* The following add instructions referencing stack-based local variables.
void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
+#if FEATURE_HW_INTRINSICS
+void emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype);
+#endif
+
#if FEATURE_STACK_FP_X87
void emitIns_F_F0(instruction ins, unsigned fpreg);
#ifdef FEATURE_SIMD
case GT_SIMD:
structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsSIMD()->gtSIMDBaseType);
+ break;
#endif // FEATURE_SIMD
+#if FEATURE_HW_INTRINSICS
+ case GT_HWIntrinsic:
+ structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
+ break;
+#endif
break;
}
}
return OperIsSIMD(gtOper);
}
+#if FEATURE_HW_INTRINSICS
+ inline bool OperIsSimdHWIntrinsic() const;
+#else
+ inline bool OperIsSimdHWIntrinsic() const
+ {
+ return false;
+ }
+#endif
+
// This is here for cleaner GT_LONG #ifdefs.
static bool OperIsLong(genTreeOps gtOper)
{
{
}
- bool isSIMD()
+ bool isSIMD() const
{
return gtSIMDSize != 0;
}
}
#endif
};
+
+inline bool GenTree::OperIsSimdHWIntrinsic() const
+{
+ if (gtOper == GT_HWIntrinsic)
+ {
+ // We cannot use AsHWIntrinsic() as it is not declared const
+ const GenTreeHWIntrinsic* hwIntrinsic = reinterpret_cast<const GenTreeHWIntrinsic*>(this);
+ return hwIntrinsic->isSIMD();
+ }
+ return false;
+}
#endif // FEATURE_HW_INTRINSICS
/* gtIndex -- array access */
void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement SSE intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(node);
+
+ switch (intrinsicID)
+ {
+ case NI_SSE_Add:
+ assert(baseType == TYP_FLOAT);
+ op2Reg = op2->gtRegNum;
+ emit->emitIns_SIMD_R_R_R(INS_addps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
+ break;
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement SSE2 intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(node);
+
+ switch (intrinsicID)
+ {
+ case NI_SSE2_Add:
+ {
+ op2Reg = op2->gtRegNum;
+
+ instruction ins;
+ switch (baseType)
+ {
+ case TYP_DOUBLE:
+ ins = INS_addpd;
+ break;
+ case TYP_INT:
+ case TYP_UINT:
+ ins = INS_paddd;
+ break;
+ case TYP_LONG:
+ case TYP_ULONG:
+ ins = INS_paddq;
+ break;
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ ins = INS_paddb;
+ break;
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ ins = INS_paddw;
+ break;
+ default:
+ unreached();
+ break;
+ }
+
+ emit->emitIns_SIMD_R_R_R(ins, targetReg, op1Reg, op2Reg, TYP_SIMD16);
+ break;
+ }
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genSSE3Intrinsic(GenTreeHWIntrinsic* node)
void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement AVX intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+
+ genConsumeOperands(node);
+
+ emitter* emit = getEmitter();
+ switch (intrinsicID)
+ {
+ case NI_AVX_Add:
+ {
+ op2Reg = op2->gtRegNum;
+
+ instruction ins;
+ switch (baseType)
+ {
+ case TYP_DOUBLE:
+ ins = INS_addpd;
+ break;
+ case TYP_FLOAT:
+ ins = INS_addps;
+ break;
+ default:
+ unreached();
+ break;
+ }
+
+ emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg);
+ break;
+ }
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement AVX2 intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+
+ genConsumeOperands(node);
+
+ emitter* emit = getEmitter();
+ switch (intrinsicID)
+ {
+ case NI_AVX2_Add:
+ {
+ op2Reg = op2->gtRegNum;
+
+ instruction ins;
+ switch (baseType)
+ {
+ case TYP_INT:
+ case TYP_UINT:
+ ins = INS_paddd;
+ break;
+ case TYP_LONG:
+ case TYP_ULONG:
+ ins = INS_paddq;
+ break;
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ ins = INS_paddb;
+ break;
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ ins = INS_paddw;
+ break;
+ default:
+ unreached();
+ break;
+ }
+
+ emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg);
+ break;
+ }
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node)
// Intrinsic ID Function name ISA
// SSE Intrinsics
HARDWARE_INTRINSIC(SSE_IsSupported, "get_IsSupported", SSE)
+HARDWARE_INTRINSIC(SSE_Add, "Add", SSE)
// SSE2 Intrinsics
HARDWARE_INTRINSIC(SSE2_IsSupported, "get_IsSupported", SSE2)
+HARDWARE_INTRINSIC(SSE2_Add, "Add", SSE2)
// SSE3 Intrinsics
HARDWARE_INTRINSIC(SSE3_IsSupported, "get_IsSupported", SSE3)
// AVX Intrinsics
HARDWARE_INTRINSIC(AVX_IsSupported, "get_IsSupported", AVX)
+HARDWARE_INTRINSIC(AVX_Add, "Add", AVX)
// AVX2 Intrinsics
HARDWARE_INTRINSIC(AVX2_IsSupported, "get_IsSupported", AVX2)
+HARDWARE_INTRINSIC(AVX2_Add, "Add", AVX2)
// AES Intrinsics
HARDWARE_INTRINSIC(AES_IsSupported, "get_IsSupported", AES)
}
//------------------------------------------------------------------------
+// isIntrinsicAnIsSupportedPropertyGetter: return true if the intrinsic is "get_IsSupported"
+//
+// Arguments:
+// intrinsic -- id of the intrinsic function.
+//
+// Return Value:
+// true if the intrinsic is "get_IsSupported"
+// Sometimes we need to specially treat "get_IsSupported"
+bool Compiler::isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic)
+{
+ switch (intrinsic)
+ {
+ case NI_SSE_IsSupported:
+ case NI_SSE2_IsSupported:
+ case NI_SSE3_IsSupported:
+ case NI_SSSE3_IsSupported:
+ case NI_SSE41_IsSupported:
+ case NI_SSE42_IsSupported:
+ case NI_AVX_IsSupported:
+ case NI_AVX2_IsSupported:
+ case NI_AES_IsSupported:
+ case NI_BMI1_IsSupported:
+ case NI_BMI2_IsSupported:
+ case NI_FMA_IsSupported:
+ case NI_LZCNT_IsSupported:
+ case NI_PCLMULQDQ_IsSupported:
+ case NI_POPCNT_IsSupported:
+ return true;
+ default:
+ return false;
+ }
+}
+
+//------------------------------------------------------------------------
// impX86HWIntrinsic: dispatch hardware intrinsics to their own implementation
// function
//
GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
InstructionSet isa = isaOfHWIntrinsic(intrinsic);
- if (!compSupports(isa) && strcmp("get_IsSupported", getHWIntrinsicName(intrinsic)) != 0)
+ // Will throw PlatformNotSupportedException if
+ // - calling hardware intrinsics on unsupported hardware
+ // - calling SIMD hardware intrinsics with featureSIMD=false
+ if ((!compSupports(isa) || (!featureSIMD && isa != InstructionSet_BMI1 && isa != InstructionSet_BMI2 &&
+ isa != InstructionSet_LZCNT && isa != InstructionSet_POPCNT)) &&
+ !isIntrinsicAnIsSupportedPropertyGetter(intrinsic))
{
for (unsigned i = 0; i < sig->numArgs; i++)
{
}
}
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType)
+{
+ if (simdType == TYP_SIMD16)
+ {
+ switch (simdBaseType)
+ {
+ case TYP_FLOAT:
+ return Vector128FloatHandle;
+ case TYP_DOUBLE:
+ return Vector128DoubleHandle;
+ case TYP_INT:
+ return Vector128IntHandle;
+ case TYP_CHAR:
+ case TYP_USHORT:
+ return Vector128UShortHandle;
+ case TYP_UBYTE:
+ return Vector128UByteHandle;
+ case TYP_SHORT:
+ return Vector128ShortHandle;
+ case TYP_BYTE:
+ return Vector128ByteHandle;
+ case TYP_LONG:
+ return Vector128LongHandle;
+ case TYP_UINT:
+ return Vector128UIntHandle;
+ case TYP_ULONG:
+ return Vector128ULongHandle;
+ default:
+ assert(!"Didn't find a class handle for simdType");
+ }
+ }
+ else if (simdType == TYP_SIMD32)
+ {
+ switch (simdBaseType)
+ {
+ case TYP_FLOAT:
+ return Vector256FloatHandle;
+ case TYP_DOUBLE:
+ return Vector256DoubleHandle;
+ case TYP_INT:
+ return Vector256IntHandle;
+ case TYP_CHAR:
+ case TYP_USHORT:
+ return Vector256UShortHandle;
+ case TYP_UBYTE:
+ return Vector256UByteHandle;
+ case TYP_SHORT:
+ return Vector256ShortHandle;
+ case TYP_BYTE:
+ return Vector256ByteHandle;
+ case TYP_LONG:
+ return Vector256LongHandle;
+ case TYP_UINT:
+ return Vector256UIntHandle;
+ case TYP_ULONG:
+ return Vector256ULongHandle;
+ default:
+ assert(!"Didn't find a class handle for simdType");
+ }
+ }
+
+ return NO_CLASS_HANDLE;
+}
+
GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
switch (intrinsic)
{
case NI_SSE_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE));
+ break;
+
+ case NI_SSE_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD16);
+ op1 = impSIMDPopStack(TYP_SIMD16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE_Add, TYP_FLOAT, 16);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ var_types baseType = TYP_UNKNOWN;
switch (intrinsic)
{
case NI_SSE2_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE2));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE2));
+ break;
+
+ case NI_SSE2_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD16);
+ op1 = impSIMDPopStack(TYP_SIMD16);
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_Add, baseType, 16);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impSSE3Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
switch (intrinsic)
{
case NI_SSE3_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE3));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE3));
default:
return nullptr;
switch (intrinsic)
{
case NI_SSSE3_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSSE3));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSSE3));
default:
return nullptr;
switch (intrinsic)
{
case NI_SSE41_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE41));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE41));
default:
return nullptr;
switch (intrinsic)
{
case NI_SSE42_IsSupported:
- retNode = gtNewIconNode(compSupports(InstructionSet_SSE42));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE42));
break;
case NI_SSE42_Crc32:
GenTree* Compiler::impAVXIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ var_types baseType = TYP_UNKNOWN;
switch (intrinsic)
{
case NI_AVX_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_AVX));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_AVX));
+ break;
+
+ case NI_AVX_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD32);
+ op1 = impSIMDPopStack(TYP_SIMD32);
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX_Add, baseType, 32);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impAVX2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ var_types baseType = TYP_UNKNOWN;
switch (intrinsic)
{
case NI_AVX2_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_AVX2));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_AVX2));
+ break;
+
+ case NI_AVX2_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD32);
+ op1 = impSIMDPopStack(TYP_SIMD32);
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX2_Add, baseType, 32);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impAESIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
switch (intrinsic)
{
case NI_AES_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_AES));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_AES));
default:
return nullptr;
switch (intrinsic)
{
case NI_FMA_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_FMA));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_FMA));
default:
return nullptr;
switch (intrinsic)
{
case NI_PCLMULQDQ_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_PCLMULQDQ));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_PCLMULQDQ));
default:
return nullptr;
assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
src->gtOper == GT_COMMA || src->gtOper == GT_ADDR ||
- (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+ (src->TypeGet() != TYP_STRUCT &&
+ (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
assert(varTypeIsStruct(src));
assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
src->gtOper == GT_COMMA ||
- (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+ (src->TypeGet() != TYP_STRUCT &&
+ (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (destAddr->OperGet() == GT_ADDR)
{
assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
break;
#endif // FEATURE_SIMD
+#if FEATURE_HW_INTRINSICS
+ case GT_HWIntrinsic:
+ assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
+ break;
+#endif
case GT_COMMA:
{
}
else
#endif
+#if FEATURE_HW_INTRINSICS
+ if (blockNode->OperGet() == GT_HWIntrinsic && blockNode->AsHWIntrinsic()->isSIMD())
+ {
+ parent->gtOp.gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization);
+ alreadyNormalized = true;
+ }
+ else
+#endif
{
assert(blockNode->OperIsBlk());
{
assert(retNode == nullptr);
const NamedIntrinsic ni = lookupNamedIntrinsic(method);
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
if (ni > NI_HW_INTRINSIC_START && ni < NI_HW_INTRINSIC_END)
{
return impX86HWIntrinsic(ni, method, sig);
}
}
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
if ((namespaceName != nullptr) && strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0)
{
InstructionSet isa = lookupHWIntrinsicISA(className);
addr->ClearContained();
}
}
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD() && !source->OperIsSimdHWIntrinsic())
{
assert(source->IsLocal());
MakeSrcContained(blkNode, source);
needsIndirection = false;
effectiveVal = indirTree->Addr()->gtGetOp1();
}
- if (effectiveVal->OperIsSIMD())
+ if (effectiveVal->OperIsSIMD() || effectiveVal->OperIsSimdHWIntrinsic())
{
needsIndirection = false;
}
return TYP_UNKNOWN;
}
-#if FEATURE_HW_INTRINSICS && DEBUG
- if (isIntrinsicType(typeHnd))
- {
- JITDUMP("\nFound Vector Type: %s with base type %s\n", getClassNameFromMetadata(typeHnd, nullptr),
- getClassNameFromMetadata(getTypeInstantiationArgument(typeHnd, 0), nullptr));
- }
-#endif
-
// fast path search using cached type handles of important types
var_types simdBaseType = TYP_UNKNOWN;
unsigned size = 0;
- // Early return if it is not a SIMD module.
- if (!isSIMDClass(typeHnd))
- {
- return TYP_UNKNOWN;
- }
-
- // The most likely to be used type handles are looked up first followed by
- // less likely to be used type handles
- if (typeHnd == SIMDFloatHandle)
- {
- simdBaseType = TYP_FLOAT;
- JITDUMP(" Known type SIMD Vector<Float>\n");
- }
- else if (typeHnd == SIMDIntHandle)
- {
- simdBaseType = TYP_INT;
- JITDUMP(" Known type SIMD Vector<Int>\n");
- }
- else if (typeHnd == SIMDVector2Handle)
- {
- simdBaseType = TYP_FLOAT;
- size = 2 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Known type Vector2\n");
- }
- else if (typeHnd == SIMDVector3Handle)
- {
- simdBaseType = TYP_FLOAT;
- size = 3 * genTypeSize(TYP_FLOAT);
- assert(size == info.compCompHnd->getClassSize(typeHnd));
- JITDUMP(" Known type Vector3\n");
- }
- else if (typeHnd == SIMDVector4Handle)
- {
- simdBaseType = TYP_FLOAT;
- size = 4 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Known type Vector4\n");
- }
- else if (typeHnd == SIMDVectorHandle)
- {
- JITDUMP(" Known type Vector\n");
- }
- else if (typeHnd == SIMDUShortHandle)
- {
- simdBaseType = TYP_CHAR;
- JITDUMP(" Known type SIMD Vector<ushort>\n");
- }
- else if (typeHnd == SIMDUByteHandle)
+ // TODO - Optimize SIMD type recognition by IntrinsicAttribute
+ if (isSIMDClass(typeHnd))
{
- simdBaseType = TYP_UBYTE;
- JITDUMP(" Known type SIMD Vector<ubyte>\n");
- }
- else if (typeHnd == SIMDDoubleHandle)
- {
- simdBaseType = TYP_DOUBLE;
- JITDUMP(" Known type SIMD Vector<Double>\n");
- }
- else if (typeHnd == SIMDLongHandle)
- {
- simdBaseType = TYP_LONG;
- JITDUMP(" Known type SIMD Vector<Long>\n");
- }
- else if (typeHnd == SIMDShortHandle)
- {
- simdBaseType = TYP_SHORT;
- JITDUMP(" Known type SIMD Vector<short>\n");
- }
- else if (typeHnd == SIMDByteHandle)
- {
- simdBaseType = TYP_BYTE;
- JITDUMP(" Known type SIMD Vector<byte>\n");
- }
- else if (typeHnd == SIMDUIntHandle)
- {
- simdBaseType = TYP_UINT;
- JITDUMP(" Known type SIMD Vector<uint>\n");
- }
- else if (typeHnd == SIMDULongHandle)
- {
- simdBaseType = TYP_ULONG;
- JITDUMP(" Known type SIMD Vector<ulong>\n");
- }
-
- // slow path search
- if (simdBaseType == TYP_UNKNOWN)
- {
- // Doesn't match with any of the cached type handles.
- // Obtain base type by parsing fully qualified class name.
- //
- // TODO-Throughput: implement product shipping solution to query base type.
- WCHAR className[256] = {0};
- WCHAR* pbuf = &className[0];
- int len = _countof(className);
- info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
- noway_assert(pbuf < &className[256]);
- JITDUMP("SIMD Candidate Type %S\n", className);
+ // The most likely to be used type handles are looked up first followed by
+ // less likely to be used type handles
+ if (typeHnd == SIMDFloatHandle)
+ {
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Known type SIMD Vector<Float>\n");
+ }
+ else if (typeHnd == SIMDIntHandle)
+ {
+ simdBaseType = TYP_INT;
+ JITDUMP(" Known type SIMD Vector<Int>\n");
+ }
+ else if (typeHnd == SIMDVector2Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 2 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Known type Vector2\n");
+ }
+ else if (typeHnd == SIMDVector3Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 3 * genTypeSize(TYP_FLOAT);
+ assert(size == info.compCompHnd->getClassSize(typeHnd));
+ JITDUMP(" Known type Vector3\n");
+ }
+ else if (typeHnd == SIMDVector4Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 4 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Known type Vector4\n");
+ }
+ else if (typeHnd == SIMDVectorHandle)
+ {
+ JITDUMP(" Known type Vector\n");
+ }
+ else if (typeHnd == SIMDUShortHandle)
+ {
+ simdBaseType = TYP_CHAR;
+ JITDUMP(" Known type SIMD Vector<ushort>\n");
+ }
+ else if (typeHnd == SIMDUByteHandle)
+ {
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Known type SIMD Vector<ubyte>\n");
+ }
+ else if (typeHnd == SIMDDoubleHandle)
+ {
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Known type SIMD Vector<Double>\n");
+ }
+ else if (typeHnd == SIMDLongHandle)
+ {
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Known type SIMD Vector<Long>\n");
+ }
+ else if (typeHnd == SIMDShortHandle)
+ {
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Known type SIMD Vector<short>\n");
+ }
+ else if (typeHnd == SIMDByteHandle)
+ {
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Known type SIMD Vector<byte>\n");
+ }
+ else if (typeHnd == SIMDUIntHandle)
+ {
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Known type SIMD Vector<uint>\n");
+ }
+ else if (typeHnd == SIMDULongHandle)
+ {
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Known type SIMD Vector<ulong>\n");
+ }
- if (wcsncmp(className, W("System.Numerics."), 16) == 0)
+ // slow path search
+ if (simdBaseType == TYP_UNKNOWN)
{
- if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
+ // Doesn't match with any of the cached type handles.
+ // Obtain base type by parsing fully qualified class name.
+ //
+ // TODO-Throughput: implement product shipping solution to query base type.
+ WCHAR className[256] = {0};
+ WCHAR* pbuf = &className[0];
+ int len = _countof(className);
+ info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
+ noway_assert(pbuf < &className[256]);
+ JITDUMP("SIMD Candidate Type %S\n", className);
+
+ if (wcsncmp(className, W("System.Numerics."), 16) == 0)
{
- if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
- {
- SIMDFloatHandle = typeHnd;
- simdBaseType = TYP_FLOAT;
- JITDUMP(" Found type SIMD Vector<Float>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
- {
- SIMDIntHandle = typeHnd;
- simdBaseType = TYP_INT;
- JITDUMP(" Found type SIMD Vector<Int>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+ if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
{
- SIMDUShortHandle = typeHnd;
- simdBaseType = TYP_CHAR;
- JITDUMP(" Found type SIMD Vector<ushort>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
- {
- SIMDUByteHandle = typeHnd;
- simdBaseType = TYP_UBYTE;
- JITDUMP(" Found type SIMD Vector<ubyte>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
- {
- SIMDDoubleHandle = typeHnd;
- simdBaseType = TYP_DOUBLE;
- JITDUMP(" Found type SIMD Vector<Double>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
- {
- SIMDLongHandle = typeHnd;
- simdBaseType = TYP_LONG;
- JITDUMP(" Found type SIMD Vector<Long>\n");
+ if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
+ {
+ SIMDFloatHandle = typeHnd;
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Found type SIMD Vector<Float>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
+ {
+ SIMDIntHandle = typeHnd;
+ simdBaseType = TYP_INT;
+ JITDUMP(" Found type SIMD Vector<Int>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+ {
+ SIMDUShortHandle = typeHnd;
+ simdBaseType = TYP_CHAR;
+ JITDUMP(" Found type SIMD Vector<ushort>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
+ {
+ SIMDUByteHandle = typeHnd;
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Found type SIMD Vector<ubyte>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
+ {
+ SIMDDoubleHandle = typeHnd;
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Found type SIMD Vector<Double>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
+ {
+ SIMDLongHandle = typeHnd;
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Found type SIMD Vector<Long>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+ {
+ SIMDShortHandle = typeHnd;
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Found type SIMD Vector<short>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+ {
+ SIMDByteHandle = typeHnd;
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Found type SIMD Vector<byte>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+ {
+ SIMDUIntHandle = typeHnd;
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Found type SIMD Vector<uint>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+ {
+ SIMDULongHandle = typeHnd;
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Found type SIMD Vector<ulong>\n");
+ }
+ else
+ {
+ JITDUMP(" Unknown SIMD Vector<T>\n");
+ }
}
- else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
{
- SIMDShortHandle = typeHnd;
- simdBaseType = TYP_SHORT;
- JITDUMP(" Found type SIMD Vector<short>\n");
+ SIMDVector2Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 2 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Found Vector2\n");
}
- else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
{
- SIMDByteHandle = typeHnd;
- simdBaseType = TYP_BYTE;
- JITDUMP(" Found type SIMD Vector<byte>\n");
+ SIMDVector3Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 3 * genTypeSize(TYP_FLOAT);
+ assert(size == info.compCompHnd->getClassSize(typeHnd));
+ JITDUMP(" Found Vector3\n");
}
- else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
{
- SIMDUIntHandle = typeHnd;
- simdBaseType = TYP_UINT;
- JITDUMP(" Found type SIMD Vector<uint>\n");
+ SIMDVector4Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 4 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Found Vector4\n");
}
- else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
{
- SIMDULongHandle = typeHnd;
- simdBaseType = TYP_ULONG;
- JITDUMP(" Found type SIMD Vector<ulong>\n");
+ SIMDVectorHandle = typeHnd;
+ JITDUMP(" Found type Vector\n");
}
else
{
- JITDUMP(" Unknown SIMD Vector<T>\n");
+ JITDUMP(" Unknown SIMD Type\n");
}
}
- else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
+ }
+ if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
+ {
+ // If not a fixed size vector then its size is same as SIMD vector
+ // register length in bytes
+ if (size == 0)
{
- SIMDVector2Handle = typeHnd;
-
- simdBaseType = TYP_FLOAT;
- size = 2 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Found Vector2\n");
+ size = getSIMDVectorRegisterByteLength();
}
- else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
- {
- SIMDVector3Handle = typeHnd;
- simdBaseType = TYP_FLOAT;
- size = 3 * genTypeSize(TYP_FLOAT);
- assert(size == info.compCompHnd->getClassSize(typeHnd));
- JITDUMP(" Found Vector3\n");
- }
- else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
- {
- SIMDVector4Handle = typeHnd;
+ *sizeBytes = size;
+ setUsesSIMDTypes(true);
+ }
+ }
+#if FEATURE_HW_INTRINSICS
+ else if (isIntrinsicType(typeHnd))
+ {
+ if (typeHnd == Vector256FloatHandle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<float>\n");
+ }
+ else if (typeHnd == Vector256DoubleHandle)
+ {
+ simdBaseType = TYP_DOUBLE;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<double>\n");
+ }
+ else if (typeHnd == Vector256IntHandle)
+ {
+ simdBaseType = TYP_INT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<int>\n");
+ }
+ else if (typeHnd == Vector256UIntHandle)
+ {
+ simdBaseType = TYP_UINT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<uint>\n");
+ }
+ else if (typeHnd == Vector256ShortHandle)
+ {
+ simdBaseType = TYP_SHORT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<short>\n");
+ }
+ else if (typeHnd == Vector256UShortHandle)
+ {
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<ushort>\n");
+ }
+ else if (typeHnd == Vector256ByteHandle)
+ {
+ simdBaseType = TYP_BYTE;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<sbyte>\n");
+ }
+ else if (typeHnd == Vector256UByteHandle)
+ {
+ simdBaseType = TYP_UBYTE;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<byte>\n");
+ }
+ else if (typeHnd == Vector256LongHandle)
+ {
+ simdBaseType = TYP_LONG;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<long>\n");
+ }
+ else if (typeHnd == Vector256ULongHandle)
+ {
+ simdBaseType = TYP_ULONG;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<ulong>\n");
+ }
+ else if (typeHnd == Vector256FloatHandle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<float>\n");
+ }
+ else if (typeHnd == Vector128DoubleHandle)
+ {
+ simdBaseType = TYP_DOUBLE;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<double>\n");
+ }
+ else if (typeHnd == Vector128IntHandle)
+ {
+ simdBaseType = TYP_INT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<int>\n");
+ }
+ else if (typeHnd == Vector128UIntHandle)
+ {
+ simdBaseType = TYP_UINT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<uint>\n");
+ }
+ else if (typeHnd == Vector128ShortHandle)
+ {
+ simdBaseType = TYP_SHORT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<short>\n");
+ }
+ else if (typeHnd == Vector128UShortHandle)
+ {
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<ushort>\n");
+ }
+ else if (typeHnd == Vector128ByteHandle)
+ {
+ simdBaseType = TYP_BYTE;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<sbyte>\n");
+ }
+ else if (typeHnd == Vector128UByteHandle)
+ {
+ simdBaseType = TYP_UBYTE;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<byte>\n");
+ }
+ else if (typeHnd == Vector128LongHandle)
+ {
+ simdBaseType = TYP_LONG;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<long>\n");
+ }
+ else if (typeHnd == Vector128ULongHandle)
+ {
+ simdBaseType = TYP_ULONG;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<ulong>\n");
+ }
- simdBaseType = TYP_FLOAT;
- size = 4 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Found Vector4\n");
- }
- else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
- {
- SIMDVectorHandle = typeHnd;
- JITDUMP(" Found type Vector\n");
- }
- else
+ // slow path search
+ if (simdBaseType == TYP_UNKNOWN)
+ {
+ // Doesn't match with any of the cached type handles.
+ const char* className = getClassNameFromMetadata(typeHnd, nullptr);
+ CORINFO_CLASS_HANDLE baseTypeHnd = getTypeInstantiationArgument(typeHnd, 0);
+
+ if (baseTypeHnd != nullptr)
{
- JITDUMP(" Unknown SIMD Type\n");
+ CorInfoType type = info.compCompHnd->getTypeForPrimitiveNumericClass(baseTypeHnd);
+
+ JITDUMP("HW Intrinsic SIMD Candidate Type %s with Base Type %s\n", className,
+ getClassNameFromMetadata(baseTypeHnd, nullptr));
+
+ if (strcmp(className, "Vector256`1") == 0)
+ {
+ size = YMM_REGSIZE_BYTES;
+ switch (type)
+ {
+ case CORINFO_TYPE_FLOAT:
+ Vector256FloatHandle = typeHnd;
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<float>\n");
+ break;
+ case CORINFO_TYPE_DOUBLE:
+ Vector256DoubleHandle = typeHnd;
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<double>\n");
+ break;
+ case CORINFO_TYPE_INT:
+ Vector256IntHandle = typeHnd;
+ simdBaseType = TYP_INT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<int>\n");
+ break;
+ case CORINFO_TYPE_UINT:
+ Vector256UIntHandle = typeHnd;
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<uint>\n");
+ break;
+ case CORINFO_TYPE_SHORT:
+ Vector256ShortHandle = typeHnd;
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<short>\n");
+ break;
+ case CORINFO_TYPE_USHORT:
+ Vector256UShortHandle = typeHnd;
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<ushort>\n");
+ break;
+ case CORINFO_TYPE_LONG:
+ Vector256LongHandle = typeHnd;
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<long>\n");
+ break;
+ case CORINFO_TYPE_ULONG:
+ Vector256ULongHandle = typeHnd;
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<ulong>\n");
+ break;
+ case CORINFO_TYPE_UBYTE:
+ Vector256UByteHandle = typeHnd;
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<byte>\n");
+ break;
+ case CORINFO_TYPE_BYTE:
+ Vector256ByteHandle = typeHnd;
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<sbyte>\n");
+ break;
+
+ default:
+ JITDUMP(" Unknown Hardware Intrinsic SIMD Type Vector256<T>\n");
+ }
+ }
+ else if (strcmp(className, "Vector128`1") == 0)
+ {
+ size = XMM_REGSIZE_BYTES;
+ switch (type)
+ {
+ case CORINFO_TYPE_FLOAT:
+ Vector128FloatHandle = typeHnd;
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<float>\n");
+ break;
+ case CORINFO_TYPE_DOUBLE:
+ Vector128DoubleHandle = typeHnd;
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<double>\n");
+ break;
+ case CORINFO_TYPE_INT:
+ Vector128IntHandle = typeHnd;
+ simdBaseType = TYP_INT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<int>\n");
+ break;
+ case CORINFO_TYPE_UINT:
+ Vector128UIntHandle = typeHnd;
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<uint>\n");
+ break;
+ case CORINFO_TYPE_SHORT:
+ Vector128ShortHandle = typeHnd;
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<short>\n");
+ break;
+ case CORINFO_TYPE_USHORT:
+ Vector128UShortHandle = typeHnd;
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<ushort>\n");
+ break;
+ case CORINFO_TYPE_LONG:
+ Vector128LongHandle = typeHnd;
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<long>\n");
+ break;
+ case CORINFO_TYPE_ULONG:
+ Vector128ULongHandle = typeHnd;
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<ulong>\n");
+ break;
+ case CORINFO_TYPE_UBYTE:
+ Vector128UByteHandle = typeHnd;
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<byte>\n");
+ break;
+ case CORINFO_TYPE_BYTE:
+ Vector128ByteHandle = typeHnd;
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<sbyte>\n");
+ break;
+
+ default:
+ JITDUMP(" Unknown Hardware Intrinsic SIMD Type Vector128<T>\n");
+ }
+ }
}
}
- }
- if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
- {
- // If not a fixed size vector then its size is same as SIMD vector
- // register length in bytes
- if (size == 0)
+ if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
{
- size = getSIMDVectorRegisterByteLength();
+ *sizeBytes = size;
+ setUsesSIMDTypes(true);
}
-
- *sizeBytes = size;
- setUsesSIMDTypes(true);
}
+#endif // FEATURE_HW_INTRINSICS
return simdBaseType;
}
return result;
}
+/*********************************************************************/
+CorInfoType CEEInfo::getTypeForPrimitiveNumericClass(
+ CORINFO_CLASS_HANDLE clsHnd)
+{
+ CONTRACTL {
+ SO_TOLERANT;
+ THROWS;
+ GC_TRIGGERS;
+ MODE_PREEMPTIVE;
+ } CONTRACTL_END;
+
+ CorInfoType result = CORINFO_TYPE_UNDEF;
+
+ JIT_TO_EE_TRANSITION_LEAF();
+
+ TypeHandle th(clsHnd);
+ _ASSERTE (!th.IsGenericVariable());
+
+ CorElementType ty = th.GetSignatureCorElementType();
+ switch (ty)
+ {
+ case ELEMENT_TYPE_I1:
+ case ELEMENT_TYPE_U1:
+ case ELEMENT_TYPE_I2:
+ case ELEMENT_TYPE_U2:
+ case ELEMENT_TYPE_I4:
+ case ELEMENT_TYPE_U4:
+ case ELEMENT_TYPE_I8:
+ case ELEMENT_TYPE_U8:
+ case ELEMENT_TYPE_R4:
+ case ELEMENT_TYPE_R8:
+ result = asCorInfoType(ty);
+ break;
+
+ default:
+ break;
+ }
+
+ JIT_TO_EE_TRANSITION_LEAF();
+
+ return result;
+}
+
void CEEInfo::getGSCookie(GSCookie * pCookieVal, GSCookie ** ppCookieVal)
{
CORINFO_CLASS_HANDLE cls
);
+ // "System.Int32" ==> CORINFO_TYPE_INT..
+ // "System.UInt32" ==> CORINFO_TYPE_UINT..
+ CorInfoType getTypeForPrimitiveNumericClass(
+ CORINFO_CLASS_HANDLE cls
+ );
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
BOOL canCast(
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+ class Program
+ {
+ const int Pass = 100;
+ const int Fail = 0;
+
+ static unsafe int Main(string[] args)
+ {
+ int testResult = Pass;
+
+ if (Avx.IsSupported)
+ {
+ using (TestTable<float> floatTable = new TestTable<float>(new float[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new float[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new float[8]))
+ using (TestTable<double> doubleTable = new TestTable<double>(new double[4] { 1, -5, 100, 0 }, new double[4] { 22, -1, -50, 0 }, new double[4]))
+ {
+ var vf1 = Unsafe.Read<Vector256<float>>(floatTable.inArray1Ptr);
+ var vf2 = Unsafe.Read<Vector256<float>>(floatTable.inArray2Ptr);
+ var vf3 = Avx.Add(vf1, vf2);
+ Unsafe.Write(floatTable.outArrayPtr, vf3);
+
+ var vd1 = Unsafe.Read<Vector256<double>>(doubleTable.inArray1Ptr);
+ var vd2 = Unsafe.Read<Vector256<double>>(doubleTable.inArray2Ptr);
+ var vd3 = Avx.Add(vd1, vd2);
+ Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+ if (!floatTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX Add failed on float:");
+ foreach (var item in floatTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!doubleTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX Add failed on double:");
+ foreach (var item in doubleTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+ }
+ }
+
+ if (Avx2.IsSupported)
+ {
+ using (TestTable<int> intTable = new TestTable<int>(new int[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new int[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new int[8]))
+ using (TestTable<long> longTable = new TestTable<long>(new long[4] { 1, -5, 100, 0 }, new long[4] { 22, -1, -50, 0 }, new long[4]))
+ using (TestTable<uint> uintTable = new TestTable<uint>(new uint[8] { 1, 5, 100, 0, 1, 5, 100, 0 }, new uint[8] { 22, 1, 50, 0, 22, 1, 50, 0 }, new uint[8]))
+ using (TestTable<ulong> ulongTable = new TestTable<ulong>(new ulong[4] { 1, 5, 100, 0 }, new ulong[4] { 22, 1, 50, 0 }, new ulong[4]))
+ using (TestTable<short> shortTable = new TestTable<short>(new short[16] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new short[16] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new short[16]))
+ using (TestTable<ushort> ushortTable = new TestTable<ushort>(new ushort[16] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new ushort[16] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new ushort[16]))
+ using (TestTable<sbyte> sbyteTable = new TestTable<sbyte>(new sbyte[32] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new sbyte[32] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new sbyte[32]))
+ using (TestTable<byte> byteTable = new TestTable<byte>(new byte[32] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new byte[32] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new byte[32]))
+ {
+
+ var vi1 = Unsafe.Read<Vector256<int>>(intTable.inArray1Ptr);
+ var vi2 = Unsafe.Read<Vector256<int>>(intTable.inArray2Ptr);
+ var vi3 = Avx2.Add(vi1, vi2);
+ Unsafe.Write(intTable.outArrayPtr, vi3);
+
+ var vl1 = Unsafe.Read<Vector256<long>>(longTable.inArray1Ptr);
+ var vl2 = Unsafe.Read<Vector256<long>>(longTable.inArray2Ptr);
+ var vl3 = Avx2.Add(vl1, vl2);
+ Unsafe.Write(longTable.outArrayPtr, vl3);
+
+ var vui1 = Unsafe.Read<Vector256<uint>>(uintTable.inArray1Ptr);
+ var vui2 = Unsafe.Read<Vector256<uint>>(uintTable.inArray2Ptr);
+ var vui3 = Avx2.Add(vui1, vui2);
+ Unsafe.Write(uintTable.outArrayPtr, vui3);
+
+ var vul1 = Unsafe.Read<Vector256<ulong>>(ulongTable.inArray1Ptr);
+ var vul2 = Unsafe.Read<Vector256<ulong>>(ulongTable.inArray2Ptr);
+ var vul3 = Avx2.Add(vul1, vul2);
+ Unsafe.Write(ulongTable.outArrayPtr, vul3);
+
+ var vs1 = Unsafe.Read<Vector256<short>>(shortTable.inArray1Ptr);
+ var vs2 = Unsafe.Read<Vector256<short>>(shortTable.inArray2Ptr);
+ var vs3 = Avx2.Add(vs1, vs2);
+ Unsafe.Write(shortTable.outArrayPtr, vs3);
+
+ var vus1 = Unsafe.Read<Vector256<ushort>>(ushortTable.inArray1Ptr);
+ var vus2 = Unsafe.Read<Vector256<ushort>>(ushortTable.inArray2Ptr);
+ var vus3 = Avx2.Add(vus1, vus2);
+ Unsafe.Write(ushortTable.outArrayPtr, vus3);
+
+ var vsb1 = Unsafe.Read<Vector256<sbyte>>(sbyteTable.inArray1Ptr);
+ var vsb2 = Unsafe.Read<Vector256<sbyte>>(sbyteTable.inArray2Ptr);
+ var vsb3 = Avx2.Add(vsb1, vsb2);
+ Unsafe.Write(sbyteTable.outArrayPtr, vsb3);
+
+ var vb1 = Unsafe.Read<Vector256<byte>>(byteTable.inArray1Ptr);
+ var vb2 = Unsafe.Read<Vector256<byte>>(byteTable.inArray2Ptr);
+ var vb3 = Avx2.Add(vb1, vb2);
+ Unsafe.Write(byteTable.outArrayPtr, vb3);
+
+ if (!intTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on int:");
+ foreach (var item in intTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!longTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on long:");
+ foreach (var item in longTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!uintTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on uint:");
+ foreach (var item in uintTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ulongTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on ulong:");
+ foreach (var item in ulongTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!shortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on short:");
+ foreach (var item in shortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ushortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on ushort:");
+ foreach (var item in ushortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!sbyteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on sbyte:");
+ foreach (var item in sbyteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!byteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on byte:");
+ foreach (var item in byteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+ }
+
+ }
+
+ if (Sse2.IsSupported)
+ {
+ using (TestTable<float> floatTable = new TestTable<float>(new float[4] { 1, -5, 100, 0 }, new float[4] { 22, -1, -50, 0 }, new float[4]))
+ using (TestTable<double> doubleTable = new TestTable<double>(new double[2] { 1, -5 }, new double[2] { 22, -1 }, new double[2]))
+ using (TestTable<int> intTable = new TestTable<int>(new int[4] { 1, -5, 100, 0 }, new int[4] { 22, -1, -50, 0 }, new int[4]))
+ using (TestTable<long> longTable = new TestTable<long>(new long[2] { 1, -5 }, new long[2] { 22, -1 }, new long[2]))
+ using (TestTable<uint> uintTable = new TestTable<uint>(new uint[4] { 1, 5, 100, 0 }, new uint[4] { 22, 1, 50, 0 }, new uint[4]))
+ using (TestTable<ulong> ulongTable = new TestTable<ulong>(new ulong[2] { 1, 5 }, new ulong[2] { 22, 1 }, new ulong[2]))
+ using (TestTable<short> shortTable = new TestTable<short>(new short[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new short[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new short[8]))
+ using (TestTable<ushort> ushortTable = new TestTable<ushort>(new ushort[8] { 1, 5, 100, 0, 1, 5, 100, 0 }, new ushort[8] { 22, 1, 50, 0, 22, 1, 50, 0 }, new ushort[8]))
+ using (TestTable<sbyte> sbyteTable = new TestTable<sbyte>(new sbyte[16] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new sbyte[16] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new sbyte[16]))
+ using (TestTable<byte> byteTable = new TestTable<byte>(new byte[16] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new byte[16] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new byte[16]))
+ {
+
+ var vf1 = Unsafe.Read<Vector128<float>>(floatTable.inArray1Ptr);
+ var vf2 = Unsafe.Read<Vector128<float>>(floatTable.inArray2Ptr);
+ var vf3 = Sse.Add(vf1, vf2);
+ Unsafe.Write(floatTable.outArrayPtr, vf3);
+
+ var vd1 = Unsafe.Read<Vector128<double>>(doubleTable.inArray1Ptr);
+ var vd2 = Unsafe.Read<Vector128<double>>(doubleTable.inArray2Ptr);
+ var vd3 = Sse2.Add(vd1, vd2);
+ Unsafe.Write(doubleTable.outArrayPtr, vd3);
+ var vi1 = Unsafe.Read<Vector128<int>>(intTable.inArray1Ptr);
+ var vi2 = Unsafe.Read<Vector128<int>>(intTable.inArray2Ptr);
+ var vi3 = Sse2.Add(vi1, vi2);
+ Unsafe.Write(intTable.outArrayPtr, vi3);
+
+ var vl1 = Unsafe.Read<Vector128<long>>(longTable.inArray1Ptr);
+ var vl2 = Unsafe.Read<Vector128<long>>(longTable.inArray2Ptr);
+ var vl3 = Sse2.Add(vl1, vl2);
+ Unsafe.Write(longTable.outArrayPtr, vl3);
+
+ var vui1 = Unsafe.Read<Vector128<uint>>(uintTable.inArray1Ptr);
+ var vui2 = Unsafe.Read<Vector128<uint>>(uintTable.inArray2Ptr);
+ var vui3 = Sse2.Add(vui1, vui2);
+ Unsafe.Write(uintTable.outArrayPtr, vui3);
+
+ var vul1 = Unsafe.Read<Vector128<ulong>>(ulongTable.inArray1Ptr);
+ var vul2 = Unsafe.Read<Vector128<ulong>>(ulongTable.inArray2Ptr);
+ var vul3 = Sse2.Add(vul1, vul2);
+ Unsafe.Write(ulongTable.outArrayPtr, vul3);
+
+ var vs1 = Unsafe.Read<Vector128<short>>(shortTable.inArray1Ptr);
+ var vs2 = Unsafe.Read<Vector128<short>>(shortTable.inArray2Ptr);
+ var vs3 = Sse2.Add(vs1, vs2);
+ Unsafe.Write(shortTable.outArrayPtr, vs3);
+
+ var vus1 = Unsafe.Read<Vector128<ushort>>(ushortTable.inArray1Ptr);
+ var vus2 = Unsafe.Read<Vector128<ushort>>(ushortTable.inArray2Ptr);
+ var vus3 = Sse2.Add(vus1, vus2);
+ Unsafe.Write(ushortTable.outArrayPtr, vus3);
+
+ var vsb1 = Unsafe.Read<Vector128<sbyte>>(sbyteTable.inArray1Ptr);
+ var vsb2 = Unsafe.Read<Vector128<sbyte>>(sbyteTable.inArray2Ptr);
+ var vsb3 = Sse2.Add(vsb1, vsb2);
+ Unsafe.Write(sbyteTable.outArrayPtr, vsb3);
+
+ var vb1 = Unsafe.Read<Vector128<byte>>(byteTable.inArray1Ptr);
+ var vb2 = Unsafe.Read<Vector128<byte>>(byteTable.inArray2Ptr);
+ var vb3 = Sse2.Add(vb1, vb2);
+ Unsafe.Write(byteTable.outArrayPtr, vb3);
+
+ if (!intTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on int:");
+ foreach (var item in intTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!longTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on long:");
+ foreach (var item in longTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!uintTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on uint:");
+ foreach (var item in uintTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ulongTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on ulong:");
+ foreach (var item in ulongTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!shortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on short:");
+ foreach (var item in shortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ushortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on ushort:");
+ foreach (var item in ushortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!floatTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE Add failed on float:");
+ foreach (var item in floatTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!doubleTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on double:");
+ foreach (var item in doubleTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!sbyteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on sbyte:");
+ foreach (var item in sbyteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!byteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on byte:");
+ foreach (var item in byteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+ }
+ }
+
+
+ return testResult;
+ }
+
+ public unsafe struct TestTable<T> : IDisposable where T : struct
+ {
+ public T[] inArray1;
+ public T[] inArray2;
+ public T[] outArray;
+
+ public void* inArray1Ptr => inHandle1.AddrOfPinnedObject().ToPointer();
+ public void* inArray2Ptr => inHandle2.AddrOfPinnedObject().ToPointer();
+ public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer();
+
+ GCHandle inHandle1;
+ GCHandle inHandle2;
+ GCHandle outHandle;
+ public TestTable(T[] a, T[] b, T[] c)
+ {
+ this.inArray1 = a;
+ this.inArray2 = b;
+ this.outArray = c;
+
+ inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned);
+ inHandle2 = GCHandle.Alloc(inArray2, GCHandleType.Pinned);
+ outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned);
+ }
+ public bool CheckResult(Func<T, T, T, bool> check)
+ {
+ for (int i = 0; i < inArray1.Length; i++)
+ {
+ if (!check(inArray1[i], inArray2[i], outArray[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public void Dispose()
+ {
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+ }
+
+ }
+}
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize></Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Add.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Add.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file