#if defined(_TARGET_XARCH_)
void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival);
-void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
+void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival);
void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins);
void genHWIntrinsic_R_R_R_RM(
GenTree* op1,
GenTree* op2,
NamedIntrinsic hwIntrinsicID);
+ GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type,
+ GenTree* op1,
+ GenTree* op2,
+ GenTree* op3,
+ NamedIntrinsic hwIntrinsicID);
GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd);
CORINFO_CLASS_HANDLE gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType);
#endif // FEATURE_HW_INTRINSICS
return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
}
+bool IsBMIInstruction(instruction ins)
+{
+ return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
+}
+
+regNumber getBmiRegNumber(instruction ins)
+{
+ switch (ins)
+ {
+ case INS_blsi:
+ {
+ return (regNumber)3;
+ }
+
+ case INS_blsmsk:
+ {
+ return (regNumber)2;
+ }
+
+ case INS_blsr:
+ {
+ return (regNumber)1;
+ }
+
+ default:
+ {
+ assert(IsBMIInstruction(ins));
+ return REG_NA;
+ }
+ }
+}
+
regNumber getSseShiftRegNumber(instruction ins)
{
switch (ins)
case INS_addss:
case INS_addsubpd:
case INS_addsubps:
+ case INS_andn:
case INS_andnpd:
case INS_andnps:
case INS_andpd:
case INS_andps:
case INS_blendpd:
case INS_blendps:
+ case INS_blsi:
+ case INS_blsmsk:
+ case INS_blsr:
case INS_cmppd:
case INS_cmpps:
case INS_cmpsd:
{
switch (ins)
{
+ case INS_andn:
+ case INS_blsi:
+ case INS_blsmsk:
+ case INS_blsr:
case INS_cvttsd2si:
case INS_cvttss2si:
case INS_cvtsd2si:
// 4-byte opcode: with the bytes ordered as 0x22114433
// check for a prefix in the 11 position
BYTE sizePrefix = (code >> 16) & 0xFF;
- if (sizePrefix != 0 && isPrefix(sizePrefix))
+ if ((sizePrefix != 0) && isPrefix(sizePrefix))
{
// 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
//
switch (sizePrefix)
{
case 0x66:
- vexPrefix |= 0x01;
+ vexPrefix |= IsBMIInstruction(ins) ? 0x00 : 0x01;
break;
case 0xF3:
vexPrefix |= 0x02;
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
}
- instrDesc* id = emitNewInstrDsp(attr, offs);
- UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
+ instrDesc* id = emitNewInstrDsp(attr, offs);
id->idIns(ins);
id->idInsFmt(IF_RWR_RRD_MRD);
id->idReg2(reg2);
id->idAddr()->iiaFieldHnd = fldHnd;
+ UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
id->idCodeSize(sz);
dispIns(id);
}
}
- unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+ regNumber reg345 = REG_NA;
+ if (IsBMIInstruction(ins))
+ {
+ reg345 = getBmiRegNumber(ins);
+ }
+ if (reg345 == REG_NA)
+ {
+ reg345 = id->idReg1();
+ }
+ unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
+
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
if (UseVEXEncoding() && (ins != INS_crc32))
}
}
- unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+ regNumber reg345 = REG_NA;
+ if (IsBMIInstruction(ins))
+ {
+ reg345 = getBmiRegNumber(ins);
+ }
+ if (reg345 == REG_NA)
+ {
+ reg345 = id->idReg1();
+ }
+ else
+ {
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+ unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
+
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
if (UseVEXEncoding() && (ins != INS_crc32))
}
}
- unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+ regNumber reg345 = REG_NA;
+ if (IsBMIInstruction(ins))
+ {
+ reg345 = getBmiRegNumber(ins);
+ }
+ if (reg345 == REG_NA)
+ {
+ reg345 = id->idReg1();
+ }
+ else
+ {
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+ unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
+
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
if (UseVEXEncoding() && (ins != INS_crc32))
}
}
- unsigned regCode = insEncodeReg345(ins, reg1, size, &code);
+ regNumber reg345 = REG_NA;
+ if (IsBMIInstruction(ins))
+ {
+ reg345 = getBmiRegNumber(ins);
+ }
+ if (reg345 == REG_NA)
+ {
+ reg345 = id->idReg1();
+ }
+ unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
regCode |= insEncodeReg012(ins, reg2, size, &code);
if (TakesVexPrefix(ins))
return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, TYP_UNKNOWN, 0);
}
+GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type,
+ GenTree* op1,
+ GenTree* op2,
+ GenTree* op3,
+ NamedIntrinsic hwIntrinsicID)
+{
+ SetOpLclRelatedToSIMDIntrinsic(op1);
+ SetOpLclRelatedToSIMDIntrinsic(op2);
+ SetOpLclRelatedToSIMDIntrinsic(op3);
+
+ return new (this, GT_HWIntrinsic)
+ GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, TYP_UNKNOWN, 0);
+}
+
//---------------------------------------------------------------------------------------
// gtNewMustThrowException:
// create a throw node (calling into JIT helper) that must be thrown.
}
else
{
- genHWIntrinsic_R_R_RM(node, ins);
+ genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize));
}
break;
}
// node - The hardware intrinsic node
// ins - The instruction being generated
//
-void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
+void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr)
{
var_types targetType = node->TypeGet();
regNumber targetReg = node->gtRegNum;
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
- emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
emitter* emit = getEmitter();
// TODO-XArch-CQ: Commutative operations can have op1 be contained
}
else if (op2->OperIsHWIntrinsic())
{
- emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum);
+ emit->emitIns_SIMD_R_R_AR(ins, attr, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum);
return;
}
else if (op2->isIndir())
case GT_CLS_VAR_ADDR:
{
- emit->emitIns_SIMD_R_R_C(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0);
+ emit->emitIns_SIMD_R_R_C(ins, attr, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0);
return;
}
default:
{
- emit->emitIns_SIMD_R_R_A(ins, simdSize, targetReg, op1Reg, memIndir);
+ emit->emitIns_SIMD_R_R_A(ins, attr, targetReg, op1Reg, memIndir);
return;
}
}
assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
assert(offset != (unsigned)-1);
- emit->emitIns_SIMD_R_R_S(ins, simdSize, targetReg, op1Reg, varNum, offset);
+ emit->emitIns_SIMD_R_R_S(ins, attr, targetReg, op1Reg, varNum, offset);
}
else
{
op1Reg = targetReg;
}
- emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg);
+ emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, op1Reg, op2Reg);
}
}
assert(op1 != nullptr);
assert(op2 != nullptr);
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- genHWIntrinsic_R_R_RM(node, ins);
+ genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize));
break;
}
switch (intrinsicId)
{
+ case NI_BMI1_AndNot:
+ {
+ assert(op2 != nullptr);
+ assert(op1->TypeGet() == op2->TypeGet());
+ assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+ genHWIntrinsic_R_R_RM(node, ins, emitTypeSize(node->TypeGet()));
+ break;
+ }
+
+ case NI_BMI1_ExtractLowestSetBit:
+ case NI_BMI1_GetMaskUpToLowestSetBit:
+ case NI_BMI1_ResetLowestSetBit:
case NI_BMI1_TrailingZeroCount:
{
assert(op2 == nullptr);
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// BMI1 Intrinsics
HARDWARE_INTRINSIC(BMI1_IsSupported, "get_IsSupported", BMI1, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(BMI1_AndNot, "AndNot", BMI1, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_andn, INS_andn, INS_andn, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_ExtractLowestSetBit, "ExtractLowestSetBit", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_blsi, INS_blsi, INS_blsi, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_GetMaskUpToLowestSetBit, "GetMaskUpToLowestSetBit", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_blsmsk, INS_blsmsk, INS_blsmsk, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_ResetLowestSetBit, "ResetLowestSetBit", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_blsr, INS_blsr, INS_blsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(BMI1_TrailingZeroCount, "TrailingZeroCount", BMI1, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
switch (intrinsic)
{
+ case NI_BMI1_AndNot:
+ {
+ assert(sig->numArgs == 2);
+
+ GenTree* op2 = impPopStack().val;
+ GenTree* op1 = impPopStack().val;
+
+ return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
+ }
+
+ case NI_BMI1_ExtractLowestSetBit:
+ case NI_BMI1_GetMaskUpToLowestSetBit:
+ case NI_BMI1_ResetLowestSetBit:
case NI_BMI1_TrailingZeroCount:
{
assert(sig->numArgs == 1);
INST3(vfnmsub231ss, "fmnsub231ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xBF)) //
INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+// BMI1
+INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+INST3(andn, "andn", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF2)) // Logical AND NOT
+INST3(blsi, "blsi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Extract Lowest Set Isolated Bit
+INST3(blsmsk, "blsmsk", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Get Mask Up to Lowest Set Bit
+INST3(blsr, "blsr", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Reset Lowest Set Bit
+
+INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
// Scalar instructions in SSE4.2