#include "gcinfoencoder.h"
//------------------------------------------------------------------------
+// assertIsContainableHWIntrinsicOp: Asserts that op is containable by node
+//
+// Arguments:
+// lowering - The lowering phase from the compiler
+// node - The HWIntrinsic node that has the contained node
+// op - The op that is contained
+//
+static void assertIsContainableHWIntrinsicOp(Lowering* lowering, GenTreeHWIntrinsic* node, GenTree* op)
+{
+#if DEBUG
+ // The Lowering::IsContainableHWIntrinsicOp call is not quite right, since it follows pre-register allocation
+ // logic. However, this check is still important due to the various containment rules that SIMD intrinsics follow.
+ //
+ // We use isContainable to track the special HWIntrinsic node containment rules (for things like LoadAligned and
+ // LoadUnaligned) and we use the supportsRegOptional check to support general-purpose loads (both from stack
+ // spillage
+ // and for isUsedFromMemory contained nodes, in the case where the register allocator decided to not allocate a
+ // register
+ // in the first place).
+
+ bool supportsRegOptional = false;
+ bool isContainable = lowering->IsContainableHWIntrinsicOp(node, op, &supportsRegOptional);
+ assert(isContainable || supportsRegOptional);
+#endif // DEBUG
+}
+
+//------------------------------------------------------------------------
// genIsTableDrivenHWIntrinsic:
//
// Arguments:
var_types targetType = node->TypeGet();
regNumber targetReg = node->gtRegNum;
GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
emitter* emit = getEmitter();
+ if (op2 != nullptr)
+ {
+ // The Compare*OrderedScalar and Compare*UnorderedScalar intrinsics come down this
+ // code path. They are all MultiIns, as the return value comes from the flags and
+ // we have two operands instead.
+
+ assert(HWIntrinsicInfo::GeneratesMultipleIns(node->gtHWIntrinsicId));
+ assert(targetReg != REG_NA);
+
+ targetReg = op1->gtRegNum;
+ op1 = op2;
+ op2 = nullptr;
+ }
+ else
+ {
+ assert(!node->OperIsCommutative());
+ }
+
assert(targetReg != REG_NA);
- assert(node->gtGetOp2() == nullptr);
- assert(!node->OperIsCommutative());
+ assert(op2 == nullptr);
if (op1->isContained() || op1->isUsedFromSpillTemp())
{
assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId));
-
-#if DEBUG
- bool supportsRegOptional = false;
- bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional);
- assert(isContainable || (supportsRegOptional && op1->IsRegOptional()));
-#endif // DEBUG
+ assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op1);
TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
if (op1->isContained() || op1->isUsedFromSpillTemp())
{
assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId));
-
-#if DEBUG
- bool supportsRegOptional = false;
- bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional);
- assert(isContainable || (supportsRegOptional && op1->IsRegOptional()));
-#endif // DEBUG
+ assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op1);
TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
if (op2->isContained() || op2->isUsedFromSpillTemp())
{
assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId));
-
-#if DEBUG
- bool supportsRegOptional = false;
- bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional);
- assert(isContainable || (supportsRegOptional && op2->IsRegOptional()));
-#endif // DEBUG
+ assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2);
TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
if (op2->isContained() || op2->isUsedFromSpillTemp())
{
assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId));
-
-#if DEBUG
- bool supportsRegOptional = false;
- bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional);
- assert(isContainable || (supportsRegOptional && op2->IsRegOptional()));
-#endif // DEBUG
+ assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2);
TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
if (op2->isContained() || op2->isUsedFromSpillTemp())
{
+ assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId));
+ assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2);
+
TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
unsigned offset = (unsigned)-1;
case NI_SSE_CompareEqualUnorderedScalar:
{
assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
regNumber tmpReg = node->GetSingleTempReg();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
// Ensure we aren't overwriting targetReg
assert(tmpReg != targetReg);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
case NI_SSE_CompareGreaterThanUnorderedScalar:
{
assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
{
assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE_CompareLessThanUnorderedScalar:
{
assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
+
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
{
assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
+
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE_CompareNotEqualUnorderedScalar:
{
assert(baseType == TYP_FLOAT);
- op2Reg = op2->gtRegNum;
regNumber tmpReg = node->GetSingleTempReg();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
// Ensure we aren't overwriting targetReg
assert(tmpReg != targetReg);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
case NI_SSE2_CompareEqualUnorderedScalar:
{
assert(baseType == TYP_DOUBLE);
- op2Reg = op2->gtRegNum;
regNumber tmpReg = node->GetSingleTempReg();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
// Ensure we aren't overwriting targetReg
assert(tmpReg != targetReg);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
case NI_SSE2_CompareGreaterThanUnorderedScalar:
{
assert(baseType == TYP_DOUBLE);
- op2Reg = op2->gtRegNum;
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar:
{
assert(baseType == TYP_DOUBLE);
- op2Reg = op2->gtRegNum;
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE2_CompareLessThanUnorderedScalar:
{
assert(baseType == TYP_DOUBLE);
- op2Reg = op2->gtRegNum;
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE2_CompareLessThanOrEqualUnorderedScalar:
{
assert(baseType == TYP_DOUBLE);
- op2Reg = op2->gtRegNum;
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
case NI_SSE2_CompareNotEqualUnorderedScalar:
{
assert(baseType == TYP_DOUBLE);
- op2Reg = op2->gtRegNum;
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
regNumber tmpReg = node->GetSingleTempReg();
// Ensure we aren't overwriting targetReg
assert(tmpReg != targetReg);
- emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
regNumber tmpReg = node->GetSingleTempReg();
assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest);
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg);
- emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg);
emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
+ emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
}
case NI_SSE41_TestZ:
{
assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest);
- emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
- emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
+ genHWIntrinsic_R_RM(node, INS_ptest, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
+ emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
}
case NI_SSE41_TestC:
{
assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest);
- emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
- emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
+ genHWIntrinsic_R_RM(node, INS_ptest, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
+ emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
}
case NI_SSE41_TestNotZAndNotC:
{
assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest);
- emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
- emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
+ genHWIntrinsic_R_RM(node, INS_ptest, emitTypeSize(TYP_SIMD16));
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
+ emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
}
void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
+ regNumber targetReg = node->gtRegNum;
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
- regNumber targetReg = node->gtRegNum;
- assert(targetReg != REG_NA);
- var_types targetType = node->TypeGet();
- var_types baseType = node->gtSIMDBaseType;
+ var_types baseType = node->gtSIMDBaseType;
+ var_types targetType = node->TypeGet();
+ emitter* emit = getEmitter();
regNumber op1Reg = op1->gtRegNum;
- regNumber op2Reg = op2->gtRegNum;
genConsumeOperands(node);
+ assert(targetReg != REG_NA);
+ assert(op1Reg != REG_NA);
+ assert(op2 != nullptr);
+ assert(!node->OperIsCommutative());
+
switch (intrinsicId)
{
case NI_SSE42_Crc32:
+ {
if (op1Reg != targetReg)
{
- assert(op2Reg != targetReg);
- inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType));
+ assert(op2->gtRegNum != targetReg);
+ emit->emitIns_R_R(INS_mov, emitTypeSize(targetType), targetReg, op1Reg);
}
- if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument
+ // This makes the genHWIntrinsic_R_RM code much simpler, as we don't need an
+ // overload that explicitly takes the operands.
+ node->gtOp1 = op2;
+ node->gtOp2 = nullptr;
+
+ if ((baseType == TYP_UBYTE) || (baseType == TYP_USHORT)) // baseType is the type of the second argument
{
assert(targetType == TYP_INT);
- inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType));
+ genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(baseType));
}
else
{
assert(op1->TypeGet() == op2->TypeGet());
- assert(targetType == TYP_INT || targetType == TYP_LONG);
- inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType));
+ assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+ genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(targetType));
}
break;
+ }
+
default:
+ {
unreached();
break;
+ }
}
+
genProduceReg(node);
}
case NI_AVX_TestC:
{
- emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
- emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
+ genHWIntrinsic_R_RM(node, ins, attr);
emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
+ emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
}
case NI_AVX_TestNotZAndNotC:
{
- emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
- emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
+ genHWIntrinsic_R_RM(node, ins, attr);
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
+ emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
}
case NI_AVX_TestZ:
{
- emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
- emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
+ genHWIntrinsic_R_RM(node, ins, attr);
emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
+ emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
break;
}
HARDWARE_INTRINSIC(SSE_And, "And", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE_AndNot, "AndNot", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqual", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareEqualScalar, "CompareEqualScalar", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThan, "CompareLessThan", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThanScalar, "CompareLessThanScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotEqual", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar, "CompareNotEqualScalar", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_AndNot, "AndNot", SSE2, -1, 16, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Average, "Average", SSE2, -1, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2_CompareEqual, "CompareEqual", SSE2, 0, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareEqualScalar, "CompareEqualScalar", SSE2, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThan, "CompareGreaterThan", SSE2, 6, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE2, 6, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE2, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE2, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareLessThan, "CompareLessThan", SSE2, 1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_Special, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareLessThanScalar, "CompareLessThanScalar", SSE2, 1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareNotEqual, "CompareNotEqual", SSE2, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareNotEqualScalar, "CompareNotEqualScalar", SSE2, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan, "CompareNotGreaterThan", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE2, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE3_HorizontalSubtract, "HorizontalSubtract", SSE3, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE3_LoadAndDuplicateToVector128, "LoadAndDuplicateToVector128", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE3_LoadDquVector128, "LoadDquVector128", SSE3, -1, 16, 1, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE3_MoveAndDuplicate, "MoveAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE3_MoveHighAndDuplicate, "MoveHighAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate, "MoveLowAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3_MoveAndDuplicate, "MoveAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3_MoveHighAndDuplicate, "MoveHighAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate, "MoveLowAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
HARDWARE_INTRINSIC(SSE41_Ceiling, "Ceiling", SSE41, 10, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_CeilingScalar, "CeilingScalar", SSE41, 10, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41_CompareEqual, "CompareEqual", SSE41, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_DotProduct, "DotProduct", SSE41, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(SSE41_Extract, "Extract", SSE41, -1, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_pextrq, INS_pextrq, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_Floor, "Floor", SSE41, 9, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_RoundToZero, "RoundToZero", SSE41, 11, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE41_RoundToZeroScalar, "RoundToZeroScalar", SSE41, 11, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE41_TestAllOnes, "TestAllOnes", SSE41, -1, 16, 1, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE41_TestAllZeros, "TestAllZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE41_TestC, "TestC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros, "TestMixOnesZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC, "TestNotZAndNotC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(SSE41_TestZ, "TestZ", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestAllZeros, "TestAllZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestC, "TestC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros, "TestMixOnesZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC, "TestNotZAndNotC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(SSE41_TestZ, "TestZ", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
HARDWARE_INTRINSIC(AVX_StoreAligned, "StoreAligned", AVX, -1, 32, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", AVX, -1, 32, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_Subtract, "Subtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AVX_TestC, "TestC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX_TestNotZAndNotC, "TestNotZAndNotC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(AVX_TestZ, "TestZ", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX_TestC, "TestC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX_TestNotZAndNotC, "TestNotZAndNotC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX_TestZ, "TestZ", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX_UnpackHigh, "UnpackHigh", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_UnpackLow, "UnpackLow", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_Xor, "Xor", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX2_Permute2x128, "Permute2x128", AVX2, -1, 32, 3, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_OneTypeGeneric|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical, "ShiftLeftLogical", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical128BitLane, "ShiftLeftLogical128BitLane", AVX2, -1, 32, 2, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShiftLeftLogicalVariable, "ShiftLeftLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2_ShiftLeftLogicalVariable, "ShiftLeftLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize)
HARDWARE_INTRINSIC(AVX2_ShiftRightArithmetic, "ShiftRightArithmetic", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2_ShiftRightLogical, "ShiftRightLogical", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX2_ShiftRightLogical128BitLane, "ShiftRightLogical128BitLane", AVX2, -1, 32, 2, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2_ShiftRightLogicalVariable, "ShiftRightLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2_ShiftRightLogicalVariable, "ShiftRightLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize)
HARDWARE_INTRINSIC(AVX2_Subtract, "Subtract", AVX2, -1, 32, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2_SubtractSaturate, "SubtractSaturate", AVX2, -1, 32, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2_UnpackHigh, "UnpackHigh", AVX2, -1, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)