case INS_pcmpgtd:
case INS_pcmpgtq:
case INS_pcmpgtw:
+ case INS_pdep:
+ case INS_pext:
case INS_phaddd:
case INS_phaddsw:
case INS_phaddw:
case INS_mov_xmm2i:
case INS_mov_i2xmm:
case INS_movnti:
+ case INS_pdep:
+ case INS_pext:
return true;
default:
return false;
switch (sizePrefix)
{
case 0x66:
- vexPrefix |= IsBMIInstruction(ins) ? 0x00 : 0x01;
+ if (IsBMIInstruction(ins))
+ {
+ switch (ins)
+ {
+ case INS_pdep:
+ {
+ vexPrefix |= 0x03;
+ break;
+ }
+
+ case INS_pext:
+ {
+ vexPrefix |= 0x02;
+ break;
+ }
+
+ default:
+ {
+ vexPrefix |= 0x00;
+ break;
+ }
+ }
+ }
+ else
+ {
+ vexPrefix |= 0x01;
+ }
break;
case 0xF3:
vexPrefix |= 0x02;
//
void CodeGen::genBMI2Intrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement BMI2 intrinsic code generation");
+ NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
+ regNumber targetReg = node->gtRegNum;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ var_types baseType = node->gtSIMDBaseType;
+ var_types targetType = node->TypeGet();
+ instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
+ emitter* emit = getEmitter();
+
+ assert(targetReg != REG_NA);
+ assert(op1 != nullptr);
+
+ if (!op1->OperIsList())
+ {
+ genConsumeOperands(node);
+ }
+
+ switch (intrinsicId)
+ {
+ case NI_BMI2_ParallelBitDeposit:
+ case NI_BMI2_ParallelBitExtract:
+ {
+ assert(op2 != nullptr);
+ assert(op1->TypeGet() == op2->TypeGet());
+ assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+ genHWIntrinsic_R_R_RM(node, ins, emitTypeSize(node->TypeGet()));
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ break;
+ }
+ }
+
+ genProduceReg(node);
}
//------------------------------------------------------------------------
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// BMI2 Intrinsics
HARDWARE_INTRINSIC(BMI2_IsSupported, "get_IsSupported", BMI2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(BMI2_ParallelBitDeposit, "ParallelBitDeposit", BMI2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_pdep, INS_pdep, INS_pdep, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_ParallelBitExtract, "ParallelBitExtract", BMI2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_pext, INS_pext, INS_pext, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags
{
// These ISAs have no implementation
case InstructionSet_AES:
- case InstructionSet_BMI2:
case InstructionSet_PCLMULQDQ:
{
return false;
case InstructionSet_AVX:
case InstructionSet_AVX2:
case InstructionSet_BMI1:
+ case InstructionSet_BMI2:
case InstructionSet_SSE42:
{
return true;
CORINFO_SIG_INFO* sig,
bool mustExpand)
{
- return nullptr;
+ var_types callType = JITtype2varType(sig->retType);
+
+ switch (intrinsic)
+ {
+ case NI_BMI2_ParallelBitDeposit:
+ case NI_BMI2_ParallelBitExtract:
+ {
+ assert(sig->numArgs == 2);
+
+ GenTree* op2 = impPopStack().val;
+ GenTree* op1 = impPopStack().val;
+
+ return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
+ }
+
+ default:
+ {
+ unreached();
+ return nullptr;
+ }
+ }
}
GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic intrinsic,
INST3(blsmsk, "blsmsk", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Get Mask Up to Lowest Set Bit
INST3(blsr, "blsr", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF3)) // Reset Lowest Set Bit
+// BMI2
+INST3(pdep, "pdep", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF5)) // Parallel Bits Deposit
+INST3(pext, "pext", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0xF5)) // Parallel Bits Extract
INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)