case NI_BMI1_ExtractLowestSetBit:
case NI_BMI1_GetMaskUpToLowestSetBit:
case NI_BMI1_ResetLowestSetBit:
+ {
+ assert(op2 == nullptr);
+ assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()));
+ break;
+ }
+
case NI_BMI1_TrailingZeroCount:
{
assert(op2 == nullptr);
assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+ // tzcnt has false dependency on the target register on Intel Sandy Bridge and Haswell processors,
+ // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming.
+ regNumber targetReg = node->gtRegNum;
+ getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()));
break;
}
assert(node->gtHWIntrinsicId == NI_LZCNT_LeadingZeroCount);
genConsumeOperands(node);
+ // lzcnt has false dependency on the target register on Intel Sandy Bridge and Haswell processors,
+ // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming.
+ regNumber targetReg = node->gtRegNum;
+ getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
genHWIntrinsic_R_RM(node, INS_lzcnt, emitTypeSize(node->TypeGet()));
genProduceReg(node);
}
assert(node->gtHWIntrinsicId == NI_POPCNT_PopCount);
genConsumeOperands(node);
+ // popcnt has false dependency on the target register on Intel Sandy Bridge, Haswell, and Skylake processors,
+ // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming.
+ regNumber targetReg = node->gtRegNum;
+ getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
genHWIntrinsic_R_RM(node, INS_popcnt, emitTypeSize(node->TypeGet()));
genProduceReg(node);
}
HARDWARE_INTRINSIC(BMI1_ExtractLowestSetBit, "ExtractLowestSetBit", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_blsi, INS_blsi, INS_blsi, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1_GetMaskUpToLowestSetBit, "GetMaskUpToLowestSetBit", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_blsmsk, INS_blsmsk, INS_blsmsk, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1_ResetLowestSetBit, "ResetLowestSetBit", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_blsr, INS_blsr, INS_blsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_TrailingZeroCount, "TrailingZeroCount", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_TrailingZeroCount, "TrailingZeroCount", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// LZCNT Intrinsics
HARDWARE_INTRINSIC(LZCNT_IsSupported, "get_IsSupported", LZCNT, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount, "LeadingZeroCount", LZCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount, "LeadingZeroCount", LZCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// POPCNT Intrinsics
HARDWARE_INTRINSIC(POPCNT_IsSupported, "get_IsSupported", POPCNT, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(POPCNT_PopCount, "PopCount", POPCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(POPCNT_PopCount, "PopCount", POPCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
#endif // FEATURE_HW_INTRINSIC
#undef HARDWARE_INTRINSIC