INTRINS_PEXT_I64,
INTRINS_PDEP_I32,
INTRINS_PDEP_I64,
+ INTRINS_BZHI_I32,
+ INTRINS_BZHI_I64,
#if defined(TARGET_AMD64) || defined(TARGET_X86)
INTRINS_SSE_PMOVMSKB,
INTRINS_SSE_PSRLI_W,
values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_CTTZ32 ? INTRINS_CTTZ_I32 : INTRINS_CTTZ_I64), args, 2, "");
break;
}
+ case OP_BZHI32:
+ case OP_BZHI64: {
+ LLVMValueRef args [2];
+ args [0] = lhs;
+ args [1] = rhs;
+ values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_BZHI32 ? INTRINS_BZHI_I32 : INTRINS_BZHI_I64), args, 2, "");
+ break;
+ }
case OP_PEXT32:
case OP_PEXT64: {
LLVMValueRef args [2];
{INTRINS_CTLZ_I64, "llvm.ctlz.i64"},
{INTRINS_CTTZ_I32, "llvm.cttz.i32"},
{INTRINS_CTTZ_I64, "llvm.cttz.i64"},
+ {INTRINS_BZHI_I32, "llvm.x86.bmi.bzhi.32"},
+ {INTRINS_BZHI_I64, "llvm.x86.bmi.bzhi.64"},
{INTRINS_PEXT_I32, "llvm.x86.bmi.pext.32"},
{INTRINS_PEXT_I64, "llvm.x86.bmi.pext.64"},
{INTRINS_PDEP_I32, "llvm.x86.bmi.pdep.32"},
case INTRINS_CTTZ_I64:
AddFunc2 (module, name, LLVMInt64Type (), LLVMInt64Type (), LLVMInt1Type ());
break;
+ case INTRINS_BZHI_I32:
case INTRINS_PEXT_I32:
- AddFunc2 (module, name, LLVMInt32Type (), LLVMInt32Type (), LLVMInt32Type ());
- break;
- case INTRINS_PEXT_I64:
- AddFunc2 (module, name, LLVMInt64Type (), LLVMInt64Type (), LLVMInt64Type ());
- break;
case INTRINS_PDEP_I32:
AddFunc2 (module, name, LLVMInt32Type (), LLVMInt32Type (), LLVMInt32Type ());
break;
+ case INTRINS_BZHI_I64:
+ case INTRINS_PEXT_I64:
case INTRINS_PDEP_I64:
AddFunc2 (module, name, LLVMInt64Type (), LLVMInt64Type (), LLVMInt64Type ());
break;
MINI_OP(OP_POPCNT32, "popcnt32", IREG, IREG, NONE)
MINI_OP(OP_POPCNT64, "popcnt64", LREG, LREG, NONE)
+/* Intel BMI1 */
/* Count trailing zeroes, return 32/64 if the input is 0 */
MINI_OP(OP_CTTZ32, "cttz32", IREG, IREG, NONE)
MINI_OP(OP_CTTZ64, "cttz64", LREG, LREG, NONE)
-/* Intel BMI2 PEXT */
+
+/* Intel BMI2 */
+MINI_OP(OP_BZHI32, "bzhi32", IREG, IREG, IREG)
+MINI_OP(OP_BZHI64, "bzhi64", LREG, LREG, LREG)
MINI_OP(OP_PEXT32, "pext32", IREG, IREG, IREG)
MINI_OP(OP_PEXT64, "pext64", LREG, LREG, LREG)
-/* Intel BMI2 PDEP */
MINI_OP(OP_PDEP32, "pdep32", IREG, IREG, IREG)
MINI_OP(OP_PDEP64, "pdep64", LREG, LREG, LREG)
};
static guint16 bmi1_methods [] = {
+ SN_ExtractLowestSetBit,
+ SN_GetMaskUpToLowestSetBit,
+ SN_ResetLowestSetBit,
SN_TrailingZeroCount,
SN_get_IsSupported,
};
static guint16 bmi2_methods [] = {
SN_ParallelBitDeposit,
SN_ParallelBitExtract,
+ SN_ZeroHighBits,
SN_get_IsSupported,
};
if (!COMPILE_LLVM (cfg))
return NULL;
id = lookup_intrins (bmi1_methods, sizeof (bmi1_methods), cmethod);
+
g_assert (id != -1);
supported = (get_cpu_features () & MONO_CPU_X86_BMI1) != 0;
is_64bit = !strcmp (class_name, "X64");
EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
ins->type = STACK_I4;
return ins;
+ case SN_GetMaskUpToLowestSetBit: {
+ // x ^ (x - 1)
+ // LLVM replaces it with `blsmsk`
+ int tmp_reg = alloc_preg (cfg);
+ int result_reg = alloc_preg (cfg);
+ EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
+ EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LXOR : OP_IXOR, result_reg, args [0]->dreg, tmp_reg);
+ return ins;
+ }
+ case SN_ResetLowestSetBit: {
+ // x & (x - 1)
+ // LLVM replaces it with `blsr`
+ int tmp_reg = alloc_preg (cfg);
+ int result_reg = alloc_preg (cfg);
+ EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LSUB_IMM : OP_ISUB_IMM, tmp_reg, args [0]->dreg, 1);
+ EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
+ return ins;
+ }
+ case SN_ExtractLowestSetBit: {
+ // x & (0 - x)
+ // LLVM replaces it with `blsi`
+ int tmp_reg = alloc_preg (cfg);
+ int result_reg = alloc_preg (cfg);
+ int zero_reg = alloc_preg (cfg);
+ MONO_EMIT_NEW_ICONST (cfg, zero_reg, 0);
+ EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LSUB : OP_ISUB, tmp_reg, zero_reg, args [0]->dreg);
+ EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, args [0]->dreg, tmp_reg);
+ return ins;
+ }
case SN_TrailingZeroCount:
MONO_INST_NEW (cfg, ins, is_64bit ? OP_CTTZ64 : OP_CTTZ32);
ins->dreg = alloc_ireg (cfg);
EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
ins->type = STACK_I4;
return ins;
+ case SN_ZeroHighBits:
+ MONO_INST_NEW (cfg, ins, is_64bit ? OP_BZHI64 : OP_BZHI32);
+ ins->dreg = alloc_ireg (cfg);
+ ins->sreg1 = args [0]->dreg;
+ ins->sreg2 = args [1]->dreg;
+ ins->type = is_64bit ? STACK_I8 : STACK_I4;
+ MONO_ADD_INS (cfg->cbb, ins);
+ return ins;
case SN_ParallelBitExtract:
MONO_INST_NEW (cfg, ins, is_64bit ? OP_PEXT64 : OP_PEXT32);
ins->dreg = alloc_ireg (cfg);
// BMI1
//METHOD(AndNot)
//METHOD(BitFieldExtract)
-//METHOD(SIMDExtractLowestSetBit)
-//METHOD(GetMaskUpToLowestSetBit)
-//METHOD(ResetLowestSetBit)
+METHOD(ExtractLowestSetBit)
+METHOD(GetMaskUpToLowestSetBit)
+METHOD(ResetLowestSetBit)
METHOD(TrailingZeroCount)
// BMI2
-//METHOD(ZeroHighBits)
+METHOD(ZeroHighBits)
//METHOD(MultiplyNoFlags)
METHOD(ParallelBitDeposit)
METHOD(ParallelBitExtract)