// Return Value:
// returns true if this category can be table-driven in CodeGen
//
-static bool genIsTableDrivenHWIntrinsic(HWIntrinsicCategory category)
+static bool genIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
{
// TODO - make more categories to the table-driven framework
- const bool tableDrivenIntrinsic = category == HW_Category_SimpleSIMD;
- const bool nonTableDrivenIntrinsic = category == HW_Category_Special;
- return tableDrivenIntrinsic && !nonTableDrivenIntrinsic;
+ // HW_Category_Helper and HW_Flag_MultiIns usually need manual codegen
+ const bool tableDrivenCategory =
+ category == HW_Category_SimpleSIMD || category == HW_Category_MemoryLoad || category == HW_Category_SIMDScalar;
+ const bool tableDrivenFlag = (flags & HW_Flag_MultiIns) == 0;
+ return tableDrivenCategory && tableDrivenFlag;
}
void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID);
HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID);
- HWIntrinsicFlag flag = Compiler::flagOfHWIntrinsic(intrinsicID);
+ HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID);
+ int ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID);
- assert((flag & HW_Flag_NoCodeGen) == 0);
+ assert((flags & HW_Flag_NoCodeGen) == 0);
- if (genIsTableDrivenHWIntrinsic(category))
+ if (genIsTableDrivenHWIntrinsic(category, flags))
{
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
case 1:
genConsumeOperands(node);
op1Reg = op1->gtRegNum;
- emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
+ if (category == HW_Category_MemoryLoad)
+ {
+ emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0);
+ }
+ else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0)
+ {
+ emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg);
+ }
+ else
+ {
+
+ emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
+ }
break;
+
case 2:
- genHWIntrinsic_R_R_RM(node, ins);
+ genConsumeOperands(node);
+ if (ival != -1)
+ {
+ genHWIntrinsic_R_R_RM_I(node, ins);
+ }
+ else if (category == HW_Category_MemoryLoad)
+ {
+ emit->emitIns_SIMD_R_R_AR(ins, emitTypeSize(TYP_SIMD16), targetReg, op1->gtRegNum, op2->gtRegNum);
+ }
+ else
+ {
+ genHWIntrinsic_R_R_RM(node, ins);
+ }
break;
case 3:
{
case GT_CLS_VAR_ADDR:
{
- emit->emitIns_SIMD_R_R_C_I(ins, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0, ival,
- targetType);
+ emit->emitIns_SIMD_R_R_C_I(ins, emitTypeSize(targetType), targetReg, op1Reg,
+ memBase->gtClsVar.gtClsVarHnd, 0, ival);
return;
}
default:
{
- emit->emitIns_SIMD_R_R_A_I(ins, targetReg, op1Reg, memIndir, ival, targetType);
+ emit->emitIns_SIMD_R_R_A_I(ins, emitTypeSize(targetType), targetReg, op1Reg, memIndir, ival);
return;
}
}
assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
assert(offset != (unsigned)-1);
- emit->emitIns_SIMD_R_R_S_I(ins, targetReg, op1Reg, varNum, offset, ival, targetType);
+ emit->emitIns_SIMD_R_R_S_I(ins, emitTypeSize(targetType), targetReg, op1Reg, varNum, offset, ival);
}
else
{
- emit->emitIns_SIMD_R_R_R_I(ins, targetReg, op1Reg, op2->gtRegNum, ival, targetType);
+ emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(targetType), targetReg, op1Reg, op2->gtRegNum, ival);
}
}
regNumber targetReg = node->gtRegNum;
var_types targetType = node->TypeGet();
var_types baseType = node->gtSIMDBaseType;
- instruction ins = INS_invalid;
+ instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
switch (intrinsicID)
{
- case NI_SSE_Add:
- case NI_SSE_AddScalar:
- case NI_SSE_And:
- case NI_SSE_AndNot:
case NI_SSE_ConvertToVector128SingleScalar:
- case NI_SSE_Divide:
- case NI_SSE_DivideScalar:
- case NI_SSE_Max:
- case NI_SSE_MaxScalar:
- case NI_SSE_Min:
- case NI_SSE_MinScalar:
- case NI_SSE_MoveHighToLow:
- case NI_SSE_MoveLowToHigh:
- case NI_SSE_MoveScalar:
- case NI_SSE_Multiply:
- case NI_SSE_MultiplyScalar:
- case NI_SSE_Or:
- case NI_SSE_Subtract:
- case NI_SSE_SubtractScalar:
- case NI_SSE_UnpackHigh:
- case NI_SSE_UnpackLow:
- case NI_SSE_Xor:
{
assert(node->TypeGet() == TYP_SIMD16);
assert(node->gtSIMDBaseType == TYP_FLOAT);
genHWIntrinsic_R_R_RM(node, ins);
break;
}
-
- case NI_SSE_CompareEqual:
- case NI_SSE_CompareEqualScalar:
- case NI_SSE_CompareGreaterThan:
- case NI_SSE_CompareGreaterThanScalar:
- case NI_SSE_CompareGreaterThanOrEqual:
- case NI_SSE_CompareGreaterThanOrEqualScalar:
- case NI_SSE_CompareLessThan:
- case NI_SSE_CompareLessThanScalar:
- case NI_SSE_CompareLessThanOrEqual:
- case NI_SSE_CompareLessThanOrEqualScalar:
- case NI_SSE_CompareNotEqual:
- case NI_SSE_CompareNotEqualScalar:
- case NI_SSE_CompareNotGreaterThan:
- case NI_SSE_CompareNotGreaterThanScalar:
- case NI_SSE_CompareNotGreaterThanOrEqual:
- case NI_SSE_CompareNotGreaterThanOrEqualScalar:
- case NI_SSE_CompareNotLessThan:
- case NI_SSE_CompareNotLessThanScalar:
- case NI_SSE_CompareNotLessThanOrEqual:
- case NI_SSE_CompareNotLessThanOrEqualScalar:
- case NI_SSE_CompareOrdered:
- case NI_SSE_CompareOrderedScalar:
- case NI_SSE_CompareUnordered:
- case NI_SSE_CompareUnorderedScalar:
- {
- assert(node->TypeGet() == TYP_SIMD16);
- assert(node->gtSIMDBaseType == TYP_FLOAT);
- assert(Compiler::ivalOfHWIntrinsic(intrinsicID) != -1);
-
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- genHWIntrinsic_R_R_RM_I(node, ins);
- break;
- }
-
case NI_SSE_CompareEqualOrderedScalar:
case NI_SSE_CompareEqualUnorderedScalar:
{
assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
-
- regNumber tmpReg = node->GetSingleTempReg();
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
+ op2Reg = op2->gtRegNum;
+ regNumber tmpReg = node->GetSingleTempReg();
- emit->emitIns_SIMD_R_R(ins, op1Reg, op2Reg, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R(ins, op1Reg, op2Reg, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R(ins, op1Reg, op2Reg, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R(ins, op2Reg, op1Reg, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R(ins, op2Reg, op1Reg, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
assert(baseType == TYP_FLOAT);
op2Reg = op2->gtRegNum;
- regNumber tmpReg = node->GetSingleTempReg();
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
+ regNumber tmpReg = node->GetSingleTempReg();
- emit->emitIns_SIMD_R_R(ins, op1Reg, op2Reg, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
break;
}
- case NI_SSE_ConvertToInt32:
- case NI_SSE_ConvertToInt32WithTruncation:
- case NI_SSE_ConvertToInt64:
- case NI_SSE_ConvertToInt64WithTruncation:
- case NI_SSE_Reciprocal:
- case NI_SSE_ReciprocalSqrt:
- case NI_SSE_Sqrt:
- {
- assert(baseType == TYP_FLOAT);
- assert(op2 == nullptr);
-
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R(ins, targetReg, op1Reg, TYP_SIMD16);
- break;
- }
-
case NI_SSE_ConvertToSingle:
case NI_SSE_StaticCast:
{
assert(op2 == nullptr);
if (op1Reg != targetReg)
{
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R(ins, targetReg, op1Reg, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
}
break;
}
- case NI_SSE_LoadAlignedVector128:
- case NI_SSE_LoadScalar:
- case NI_SSE_LoadVector128:
- {
- assert(baseType == TYP_FLOAT);
- assert(op2 == nullptr);
-
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_R_AR(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, 0);
- break;
- }
-
- case NI_SSE_LoadHigh:
- case NI_SSE_LoadLow:
- {
- assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
-
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R_AR(ins, targetReg, op1Reg, op2Reg, TYP_SIMD16);
- break;
- }
-
case NI_SSE_MoveMask:
{
assert(baseType == TYP_FLOAT);
assert(op2 == nullptr);
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R(ins, targetReg, op1Reg, TYP_INT);
- break;
- }
-
- case NI_SSE_ReciprocalScalar:
- case NI_SSE_ReciprocalSqrtScalar:
- case NI_SSE_SqrtScalar:
- {
- assert(baseType == TYP_FLOAT);
- assert(op2 == nullptr);
-
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
- emit->emitIns_SIMD_R_R_R(ins, targetReg, op1Reg, op1Reg, TYP_SIMD16);
- break;
- }
-
- case NI_SSE_SetAllVector128:
- {
- assert(baseType == TYP_FLOAT);
- assert(op2 == nullptr);
- emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op1Reg, 0, TYP_SIMD16);
+ emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
break;
}
if (op1Reg == targetReg)
{
regNumber tmpReg = node->GetSingleTempReg();
- emit->emitIns_SIMD_R_R(INS_movaps, tmpReg, op1Reg, TYP_SIMD16);
+ emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
op1Reg = tmpReg;
}
- emit->emitIns_SIMD_R_R_R(INS_xorps, targetReg, targetReg, targetReg, TYP_SIMD16);
- emit->emitIns_SIMD_R_R_R(INS_movss, targetReg, targetReg, op1Reg, TYP_SIMD16);
+ emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
+ emit->emitIns_SIMD_R_R_R(INS_movss, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
break;
}
assert(baseType == TYP_FLOAT);
assert(op1 == nullptr);
assert(op2 == nullptr);
- emit->emitIns_SIMD_R_R_R(INS_xorps, targetReg, targetReg, targetReg, TYP_SIMD16);
+ emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
break;
}
if (op3->IsCnsIntOrI())
{
ssize_t ival = op3->AsIntConCommon()->IconValue();
- emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op2Reg, (int)ival, TYP_SIMD16);
+ emit->emitIns_SIMD_R_R_R_I(INS_shufps, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, (int)ival);
}
else
{
for (unsigned i = 0; i < jmpCount; i++)
{
genDefineTempLabel(jmpTable[i]);
- emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op2Reg, i, TYP_SIMD16);
+ emit->emitIns_SIMD_R_R_R_I(INS_shufps, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, i);
emit->emitIns_J(INS_jmp, switchTableEnd);
}
1) Each hardware intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic`
2) All the overloads of an intrinsic in an ISA class share one Intrinsic ID
3) The intrinsic that generates instructions with a fixed imm8 operand has a `ival` field with "not -1" value, e.g., Sse.CompareEqual(v1,v2) -> cmpps xmm0, xmm1, 0
- 4) SIMD intrinsics have a non-zero `SIMD size` field based-on that operate over `Vector128<T>` (16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_) or `Vector256<T>`
+ 4) SIMD intrinsics have a non-zero `SIMD size` field based-on that operate over `Vector128<T>`(16) or `Vector256<T>`(32)
5) Scalar intrinsics that operate over general purpose registers (e.g., Sse41.Crc32) have `SIMD size` with 0
6) Each intrinsic has a `NumArg` for number of parameters, and some intrinsics that are overloaded on multiple parameter numbers have this field with -1
7) Each intrinsic has 10 `instructions` fields that list the instructions should be generated based-on the base type
// SSE Intrinsics
HARDWARE_INTRINSIC(SSE_IsSupported, "get_IsSupported", SSE, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_Add, "Add", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE_And, "And", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_AndNot, "AndNot", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Fixed)
-HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqual", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareEqualScalar, "CompareEqualScalar", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_AndNot, "AndNot", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqual", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareEqualScalar, "CompareEqualScalar", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE_CompareLessThan, "CompareLessThan", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanScalar, "CompareLessThanScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanScalar, "CompareLessThanScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotEqual", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar, "CompareNotEqualScalar", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotEqual", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar, "CompareNotEqualScalar", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqualScalar, "CompareNotGreaterThanOrEqualScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqualScalar, "CompareNotGreaterThanOrEqualScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareNotLessThan, "CompareNotLessThan", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThanScalar, "CompareNotLessThanScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotLessThanScalar, "CompareNotLessThanScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqual, "CompareNotLessThanOrEqual", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqualScalar, "CompareNotLessThanOrEqualScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareNotLessThanOrEqualScalar, "CompareNotLessThanOrEqualScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareOrdered, "CompareOrdered", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareOrderedScalar, "CompareOrderedScalar", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareOrderedScalar, "CompareOrderedScalar", SSE, 7, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareUnordered, "CompareUnordered", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar, "CompareUnorderedScalar", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64, "ConvertToInt64", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToSingle, "ConvertToSingle", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToVector128SingleScalar, "ConvertToVector128SingleScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Divide, "Divide", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_DivideScalar, "DivideScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadAlignedVector128, "LoadAlignedVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadHigh, "LoadHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadLow, "LoadLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadScalar, "LoadScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_LoadVector128, "LoadVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Max, "Max", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_MaxScalar, "MaxScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Min, "Min", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_MinScalar, "MinScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_MoveHighToLow, "MoveHighToLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_MoveLowToHigh, "MoveLowToHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_MoveMask, "MoveMask", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_MoveScalar, "MoveScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Multiply, "Multiply", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_MultiplyScalar, "MultiplyScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_CompareUnorderedScalar, "CompareUnorderedScalar", SSE, 3, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32, "ConvertToInt32", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64, "ConvertToInt64", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_ConvertToSingle, "ConvertToSingle", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToVector128SingleScalar, "ConvertToVector128SingleScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ConvertToInt32WithTruncation, "ConvertToInt32WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_ConvertToInt64WithTruncation, "ConvertToInt64WithTruncation", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_Divide, "Divide", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_DivideScalar, "DivideScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_LoadAlignedVector128, "LoadAlignedVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadHigh, "LoadHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadLow, "LoadLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadScalar, "LoadScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_LoadVector128, "LoadVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Max, "Max", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_MaxScalar, "MaxScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_Min, "Min", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_MinScalar, "MinScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_MoveHighToLow, "MoveHighToLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_MoveLowToHigh, "MoveLowToHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_MoveMask, "MoveMask", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_MoveScalar, "MoveScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(SSE_Multiply, "Multiply", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE_MultiplyScalar, "MultiplyScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE_Or, "Or", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_Reciprocal, "Reciprocal", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ReciprocalScalar, "ReciprocalScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ReciprocalScalar, "ReciprocalScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE_ReciprocalSqrt, "ReciprocalSqrt", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_ReciprocalSqrtScalar, "ReciprocalSqrtScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SetAllVector128, "SetAllVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SetScalar, "SetScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SetVector128, "SetVector128", SSE, -1, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SetZeroVector128, "SetZeroVector128", SSE, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Shuffle, "Shuffle", SSE, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_ReciprocalSqrtScalar, "ReciprocalSqrtScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_SetAllVector128, "SetAllVector128", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(SSE_SetScalar, "SetScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_Helper, HW_Flag_MultiIns)
+HARDWARE_INTRINSIC(SSE_SetVector128, "SetVector128", SSE, -1, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(SSE_SetZeroVector128, "SetZeroVector128", SSE, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_Helper, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Shuffle, "Shuffle", SSE, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_Sqrt, "Sqrt", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SqrtScalar, "SqrtScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_StaticCast, "StaticCast", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-//HARDWARE_INTRINSIC(SSE_Store, "Store", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-//HARDWARE_INTRINSIC(SSE_StoreAligned, "StoreAligned", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-//HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-//HARDWARE_INTRINSIC(SSE_StoreHigh, "StoreHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-//HARDWARE_INTRINSIC(SSE_StoreLow, "StoreLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-//HARDWARE_INTRINSIC(SSE_StoreScalar, "StoreScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_SqrtScalar, "SqrtScalar", SSE, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
+HARDWARE_INTRINSIC(SSE_StaticCast, "StaticCast", SSE, -1, 16, 1, {INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps, INS_movaps}, HW_Category_Helper, HW_Flag_TwoTypeGeneric)
HARDWARE_INTRINSIC(SSE_Subtract, "Subtract", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_SubtractScalar, "SubtractScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_SubtractScalar, "SubtractScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_UnpackHigh, "UnpackHigh", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_UnpackLow, "UnpackLow", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_Xor, "Xor", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_Xor, "Xor", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
// SSE2 Intrinsics
HARDWARE_INTRINSIC(SSE2_IsSupported, "get_IsSupported", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
int numArgs;
instruction ins[10];
HWIntrinsicCategory category;
- HWIntrinsicFlag flag;
+ HWIntrinsicFlag flags;
};
static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
static unsigned simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig)
{
assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
- assert((hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flag & HW_Flag_UnfixedSIMDSize) == 0);
+ assert((hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flags & HW_Flag_UnfixedSIMDSize) == 0);
return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].simdSize;
}
}
//------------------------------------------------------------------------
-// HWIntrinsicFlag: get the flag of the given intrinsic
+// HWIntrinsicFlag: get the flags of the given intrinsic
//
// Arguments:
// intrinsic -- id of the intrinsic function.
//
// Return Value:
-// the flag of the given intrinsic
+// the flags of the given intrinsic
//
-HWIntrinsicFlag Compiler::flagOfHWIntrinsic(NamedIntrinsic intrinsic)
+HWIntrinsicFlag Compiler::flagsOfHWIntrinsic(NamedIntrinsic intrinsic)
{
assert(intrinsic != NI_Illegal);
assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
- return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flag;
+ return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flags;
}
//------------------------------------------------------------------------
assert(varTypeIsArithmetic(argType));
arg = impPopStack().val;
assert(varTypeIsArithmetic(arg->TypeGet()));
- assert(genTypeSize(argType) <= genTypeSize(arg->TypeGet()));
+ assert(genActualType(arg->gtType) == genActualType(argType));
}
return arg;
}
// Return Value:
// returns true if this category can be table-driven in the importer
//
-static bool impIsTableDrivenHWIntrinsic(HWIntrinsicCategory category)
+static bool impIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
{
- // TODO - make more categories to the table-driven framework
- const bool tableDrivenIntrinsic = category == HW_Category_SimpleSIMD;
- const bool nonTableDrivenIntrinsic = category == HW_Category_Special;
- return tableDrivenIntrinsic && !nonTableDrivenIntrinsic;
+ // HW_Flag_NoCodeGen implies this intrinsic should be manually morphed in the importer.
+ return category != HW_Category_Special && category != HW_Category_Scalar && (flags & HW_Flag_NoCodeGen) == 0;
}
//------------------------------------------------------------------------
{
InstructionSet isa = isaOfHWIntrinsic(intrinsic);
HWIntrinsicCategory category = categoryOfHWIntrinsic(intrinsic);
+ HWIntrinsicFlag flags = flagsOfHWIntrinsic(intrinsic);
int numArgs = sig->numArgs;
- var_types callType = JITtype2varType(sig->retType);
+ var_types retType = JITtype2varType(sig->retType);
+ var_types baseType = TYP_UNKNOWN;
+ if (retType == TYP_STRUCT && featureSIMD)
+ {
+ unsigned int sizeBytes;
+ baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
+ retType = getSIMDTypeForSize(sizeBytes);
+ assert(sizeBytes != 0 && baseType != TYP_UNKNOWN);
+ }
// This intrinsic is supported if
// - the ISA is available on the underlying hardware (compSupports returns true)
// - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
// - intrinsics do not require 64-bit registers (r64) on 32-bit platforms (isTypeSupportedForIntrinsic returns
// true)
- bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa) && isTypeSupportedForIntrinsic(callType);
+ bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa) && isTypeSupportedForIntrinsic(retType);
if (category == HW_Category_IsSupportedProperty)
{
{
return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
}
+ else if (category == HW_Category_IMM)
+ {
+ GenTree* lastOp = impStackTop().val;
+ if (!lastOp->IsCnsIntOrI() && !mustExpand)
+ {
+ // When the imm-argument is not a constant and we are not being forced to expand, we need to
+ // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
+ // intrinsic method is recursive and will be forced to expand, at which point
+ // we emit some less efficient fallback code.
+ return nullptr;
+ }
+ }
+
+ if ((flags & HW_Flag_Generic) != 0)
+ {
+ assert(baseType != TYP_UNKNOWN);
+ // When the type argument is not a numeric type (and we are not being forced to expand), we need to
+ // return nullptr so a GT_CALL to the intrinsic method is emitted that will throw NotSupportedException
+ if (!varTypeIsArithmetic(baseType))
+ {
+ assert(!mustExpand);
+ return nullptr;
+ }
+
+ if ((flags & HW_Flag_TwoTypeGeneric) != 0)
+ {
+ // StaticCast<T, U> has two type parameters.
+ assert(!mustExpand);
+ assert(numArgs == 1);
+ var_types srcType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
+ assert(srcType != TYP_UNKNOWN);
+ if (!varTypeIsArithmetic(srcType))
+ {
+ return nullptr;
+ }
+ }
+ }
// table-driven importer of simple intrinsics
- if (impIsTableDrivenHWIntrinsic(category))
+ if (impIsTableDrivenHWIntrinsic(category, flags))
{
- unsigned int sizeBytes;
- var_types baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
- assert(baseType != TYP_UNKNOWN && sizeBytes != 0);
- var_types retType = getSIMDTypeForSize(sizeBytes);
+ if (!varTypeIsSIMD(retType))
+ {
+ baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
+ assert(baseType != TYP_UNKNOWN);
+ }
+
unsigned simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
CORINFO_ARG_LIST_HANDLE argList = sig->args;
CORINFO_CLASS_HANDLE argClass;
var_types argType = TYP_UNKNOWN;
- assert(numArgs > 0);
- assert(retType != TYP_UNDEF);
- assert(retType == TYP_SIMD16 || retType == TYP_SIMD32);
+ assert(numArgs >= 0);
assert(insOfHWIntrinsic(intrinsic, baseType) != INS_invalid);
assert(simdSize == 32 || simdSize == 16);
switch (numArgs)
{
+ case 0:
+ retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
+ break;
case 1:
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);
-
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
break;
case 2:
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);
- op1 = gtNewArgList(op1, op2, op3);
- retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
+ retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);
break;
}
default:
CORINFO_SIG_INFO* sig,
bool mustExpand)
{
- GenTree* retNode = nullptr;
- GenTree* op1 = nullptr;
- GenTree* op2 = nullptr;
- GenTree* op3 = nullptr;
- GenTree* op4 = nullptr;
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ GenTree* op3 = nullptr;
+ GenTree* op4 = nullptr;
+ int simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
+ assert(simdSize == 16);
switch (intrinsic)
{
op2 = impPopStack().val;
op1 = impPopStack().val;
- GenTree* left = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, 16);
- GenTree* right = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, 16);
+ GenTree* left = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, simdSize);
+ GenTree* right = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, simdSize);
GenTree* control = gtNewIconNode(68, TYP_UBYTE);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, 16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, simdSize);
break;
}
- case NI_SSE_Shuffle:
- {
- assert(sig->numArgs == 3);
- assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
-
- op3 = impStackTop().val;
-
- if (op3->IsCnsIntOrI() || mustExpand)
- {
- impPopStack(); // Pop the value we peeked at
- op2 = impSIMDPopStack(TYP_SIMD16);
- op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsic, TYP_FLOAT, 16);
- }
- else
- {
- // When op3 is not a constant and we are not being forced to expand, we need to
- // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
- // intrinsic method is recursive and will be forced to expand, at which point
- // we emit some less efficient fallback code.
-
- return nullptr;
- }
- break;
- }
-
- case NI_SSE_Add:
- case NI_SSE_AddScalar:
- case NI_SSE_And:
- case NI_SSE_AndNot:
- case NI_SSE_CompareEqual:
- case NI_SSE_CompareEqualScalar:
- case NI_SSE_CompareGreaterThan:
- case NI_SSE_CompareGreaterThanScalar:
- case NI_SSE_CompareGreaterThanOrEqual:
- case NI_SSE_CompareGreaterThanOrEqualScalar:
- case NI_SSE_CompareLessThan:
- case NI_SSE_CompareLessThanScalar:
- case NI_SSE_CompareLessThanOrEqual:
- case NI_SSE_CompareLessThanOrEqualScalar:
- case NI_SSE_CompareNotEqual:
- case NI_SSE_CompareNotEqualScalar:
- case NI_SSE_CompareNotGreaterThan:
- case NI_SSE_CompareNotGreaterThanScalar:
- case NI_SSE_CompareNotGreaterThanOrEqual:
- case NI_SSE_CompareNotGreaterThanOrEqualScalar:
- case NI_SSE_CompareNotLessThan:
- case NI_SSE_CompareNotLessThanScalar:
- case NI_SSE_CompareNotLessThanOrEqual:
- case NI_SSE_CompareNotLessThanOrEqualScalar:
- case NI_SSE_CompareOrdered:
- case NI_SSE_CompareOrderedScalar:
- case NI_SSE_CompareUnordered:
- case NI_SSE_CompareUnorderedScalar:
- case NI_SSE_Divide:
- case NI_SSE_DivideScalar:
- case NI_SSE_Max:
- case NI_SSE_MaxScalar:
- case NI_SSE_Min:
- case NI_SSE_MinScalar:
- case NI_SSE_MoveHighToLow:
- case NI_SSE_MoveLowToHigh:
- case NI_SSE_MoveScalar:
- case NI_SSE_Multiply:
- case NI_SSE_MultiplyScalar:
- case NI_SSE_Or:
- case NI_SSE_Subtract:
- case NI_SSE_SubtractScalar:
- case NI_SSE_UnpackHigh:
- case NI_SSE_UnpackLow:
- case NI_SSE_Xor:
- assert(sig->numArgs == 2);
- assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
- op2 = impSIMDPopStack(TYP_SIMD16);
- op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
- break;
-
- case NI_SSE_CompareEqualOrderedScalar:
- case NI_SSE_CompareEqualUnorderedScalar:
- case NI_SSE_CompareGreaterThanOrderedScalar:
- case NI_SSE_CompareGreaterThanUnorderedScalar:
- case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
- case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
- case NI_SSE_CompareLessThanOrderedScalar:
- case NI_SSE_CompareLessThanUnorderedScalar:
- case NI_SSE_CompareLessThanOrEqualOrderedScalar:
- case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
- case NI_SSE_CompareNotEqualOrderedScalar:
- case NI_SSE_CompareNotEqualUnorderedScalar:
- assert(sig->numArgs == 2);
- assert(JITtype2varType(sig->retType) == TYP_BOOL);
- assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
- op2 = impSIMDPopStack(TYP_SIMD16);
- op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_BOOL, op1, op2, intrinsic, TYP_FLOAT, 16);
- break;
-
case NI_SSE_ConvertToVector128SingleScalar:
{
assert(sig->numArgs == 2);
op2 = impPopStack().val;
op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
- break;
- }
-
- case NI_SSE_LoadHigh:
- case NI_SSE_LoadLow:
- {
- assert(sig->numArgs == 2);
- assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
- op2 = impPopStack().val;
- op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, simdSize);
break;
}
assert(JITtype2varType(sig->retType) == TYP_INT);
assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, 16);
- break;
-
- case NI_SSE_StaticCast:
- {
- assert(sig->numArgs == 1);
- var_types tgtType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
- var_types srcType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
-
- if (varTypeIsArithmetic(tgtType) && varTypeIsArithmetic(srcType))
- {
- op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, tgtType, 16);
- }
- else
- {
- return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
- }
+ retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, simdSize);
break;
- }
- case NI_SSE_LoadAlignedVector128:
- case NI_SSE_LoadScalar:
- case NI_SSE_LoadVector128:
case NI_SSE_SetAllVector128:
- case NI_SSE_SetScalar:
assert(sig->numArgs == 1);
assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
op1 = impPopStack().val;
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16);
- break;
-
- case NI_SSE_Reciprocal:
- case NI_SSE_ReciprocalScalar:
- case NI_SSE_ReciprocalSqrt:
- case NI_SSE_ReciprocalSqrtScalar:
- case NI_SSE_Sqrt:
- case NI_SSE_SqrtScalar:
- assert(sig->numArgs == 1);
- assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
- op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16);
- break;
-
- case NI_SSE_ConvertToInt32:
- case NI_SSE_ConvertToInt32WithTruncation:
- case NI_SSE_ConvertToInt64:
- case NI_SSE_ConvertToInt64WithTruncation:
- case NI_SSE_ConvertToSingle:
- {
- assert(sig->numArgs == 1);
- assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
- var_types callType = JITtype2varType(sig->retType);
-
-#ifdef _TARGET_X86_
- if (varTypeIsLong(callType))
- {
- assert(intrinsic == NI_SSE_ConvertToInt64 || intrinsic == NI_SSE_ConvertToInt64WithTruncation);
- return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
- }
-#endif // _TARGET_X86_
-
- op1 = impSIMDPopStack(TYP_SIMD16);
- retNode = gtNewSimdHWIntrinsicNode(callType, op1, intrinsic, TYP_FLOAT, 16);
- break;
- }
-
- case NI_SSE_SetZeroVector128:
- assert(sig->numArgs == 0);
- assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
- retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, intrinsic, TYP_FLOAT, 16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtCloneExpr(op1), gtNewIconNode(0), NI_SSE_Shuffle,
+ TYP_FLOAT, simdSize);
break;
default: