INTRINS_PDEP_I64,
INTRINS_BZHI_I32,
INTRINS_BZHI_I64,
+ INTRINS_BEXTR_I32,
+ INTRINS_BEXTR_I64,
#if defined(TARGET_AMD64) || defined(TARGET_X86)
INTRINS_SSE_PMOVMSKB,
INTRINS_SSE_PSRLI_W,
values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_CTTZ32 ? INTRINS_CTTZ_I32 : INTRINS_CTTZ_I64), args, 2, "");
break;
}
+ case OP_BEXTR32:
+ case OP_BEXTR64: {
+ LLVMValueRef args [2];
+ args [0] = lhs;
+ args [1] = convert (ctx, rhs, ins->opcode == OP_BEXTR32 ? LLVMInt32Type () : LLVMInt64Type ()); // cast ushort to u32/u64
+ values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_BEXTR32 ? INTRINS_BEXTR_I32 : INTRINS_BEXTR_I64), args, 2, "");
+ break;
+ }
case OP_BZHI32:
case OP_BZHI64: {
LLVMValueRef args [2];
{INTRINS_CTTZ_I64, "llvm.cttz.i64"},
{INTRINS_BZHI_I32, "llvm.x86.bmi.bzhi.32"},
{INTRINS_BZHI_I64, "llvm.x86.bmi.bzhi.64"},
+ {INTRINS_BEXTR_I32, "llvm.x86.bmi.bextr.32"},
+ {INTRINS_BEXTR_I64, "llvm.x86.bmi.bextr.64"},
{INTRINS_PEXT_I32, "llvm.x86.bmi.pext.32"},
{INTRINS_PEXT_I64, "llvm.x86.bmi.pext.64"},
{INTRINS_PDEP_I32, "llvm.x86.bmi.pdep.32"},
case INTRINS_CTTZ_I64:
AddFunc2 (module, name, LLVMInt64Type (), LLVMInt64Type (), LLVMInt1Type ());
break;
+ case INTRINS_BEXTR_I32:
case INTRINS_BZHI_I32:
case INTRINS_PEXT_I32:
case INTRINS_PDEP_I32:
AddFunc2 (module, name, LLVMInt32Type (), LLVMInt32Type (), LLVMInt32Type ());
break;
+ case INTRINS_BEXTR_I64:
case INTRINS_BZHI_I64:
case INTRINS_PEXT_I64:
case INTRINS_PDEP_I64:
/* inst_c0 is the rounding mode: 0 = round, 1 = floor, 2 = ceiling */
MINI_OP(OP_SSE41_ROUNDPD, "roundpd", XREG, XREG, NONE)
+/* Intel BMI1 */
+/* Count trailing zeroes, return 32/64 if the input is 0 */
+MINI_OP(OP_CTTZ32, "cttz32", IREG, IREG, NONE)
+MINI_OP(OP_CTTZ64, "cttz64", LREG, LREG, NONE)
+MINI_OP(OP_BEXTR32, "bextr32", IREG, IREG, IREG)
+MINI_OP(OP_BEXTR64, "bextr64", LREG, LREG, LREG)
+
+/* Intel BMI2 */
+MINI_OP(OP_BZHI32, "bzhi32", IREG, IREG, IREG)
+MINI_OP(OP_BZHI64, "bzhi64", LREG, LREG, LREG)
+MINI_OP(OP_PEXT32, "pext32", IREG, IREG, IREG)
+MINI_OP(OP_PEXT64, "pext64", LREG, LREG, LREG)
+MINI_OP(OP_PDEP32, "pdep32", IREG, IREG, IREG)
+MINI_OP(OP_PDEP64, "pdep64", LREG, LREG, LREG)
+
#endif
MINI_OP(OP_XMOVE, "xmove", XREG, XREG, NONE)
MINI_OP(OP_POPCNT32, "popcnt32", IREG, IREG, NONE)
MINI_OP(OP_POPCNT64, "popcnt64", LREG, LREG, NONE)
-/* Intel BMI1 */
-/* Count trailing zeroes, return 32/64 if the input is 0 */
-MINI_OP(OP_CTTZ32, "cttz32", IREG, IREG, NONE)
-MINI_OP(OP_CTTZ64, "cttz64", LREG, LREG, NONE)
-
-/* Intel BMI2 */
-MINI_OP(OP_BZHI32, "bzhi32", IREG, IREG, IREG)
-MINI_OP(OP_BZHI64, "bzhi64", LREG, LREG, LREG)
-MINI_OP(OP_PEXT32, "pext32", IREG, IREG, IREG)
-MINI_OP(OP_PEXT64, "pext64", LREG, LREG, LREG)
-MINI_OP(OP_PDEP32, "pdep32", IREG, IREG, IREG)
-MINI_OP(OP_PDEP64, "pdep64", LREG, LREG, LREG)
};
static guint16 bmi1_methods [] = {
+ SN_AndNot,
+ SN_BitFieldExtract,
SN_ExtractLowestSetBit,
SN_GetMaskUpToLowestSetBit,
SN_ResetLowestSetBit,
};
static guint16 bmi2_methods [] = {
+ //SN_MultiplyNoFlags,
SN_ParallelBitDeposit,
SN_ParallelBitExtract,
SN_ZeroHighBits,
}
}
if (!strcmp (class_name, "Bmi1") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi1"))) {
- // We only support the subset used by corelib
- if (m_class_get_image (cfg->method->klass) != mono_get_corlib ())
- return NULL;
if (!COMPILE_LLVM (cfg))
return NULL;
id = lookup_intrins (bmi1_methods, sizeof (bmi1_methods), cmethod);
EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
ins->type = STACK_I4;
return ins;
+ case SN_AndNot: {
+ // (a ^ -1) & b
+ // LLVM replaces it with `andn`
+ int tmp_reg = alloc_preg (cfg);
+ int result_reg = alloc_preg (cfg);
+ EMIT_NEW_BIALU_IMM (cfg, ins, is_64bit ? OP_LXOR_IMM : OP_IXOR_IMM, tmp_reg, args [0]->dreg, -1);
+ EMIT_NEW_BIALU (cfg, ins, is_64bit ? OP_LAND : OP_IAND, result_reg, tmp_reg, args [1]->dreg);
+ return ins;
+ }
+ case SN_BitFieldExtract: {
+ if (fsig->param_count == 2) {
+ MONO_INST_NEW (cfg, ins, is_64bit ? OP_BEXTR64 : OP_BEXTR32);
+ ins->dreg = alloc_ireg (cfg);
+ ins->sreg1 = args [0]->dreg;
+ ins->sreg2 = args [1]->dreg;
+ ins->type = is_64bit ? STACK_I8 : STACK_I4;
+ MONO_ADD_INS (cfg->cbb, ins);
+ return ins;
+ }
+ }
case SN_GetMaskUpToLowestSetBit: {
// x ^ (x - 1)
// LLVM replaces it with `blsmsk`
}
}
if (!strcmp (class_name, "Bmi2") || (!strcmp (class_name, "X64") && cmethod->klass->nested_in && !strcmp (m_class_get_name (cmethod->klass->nested_in), "Bmi2"))) {
- // We only support the subset used by corelib
+ // We only support the subset used by corelib. Remove this check once MultiplyNoFlags is implemented.
if (m_class_get_image (cfg->method->klass) != mono_get_corlib ())
return NULL;
if (!COMPILE_LLVM (cfg))
EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
ins->type = STACK_I4;
return ins;
- case SN_ZeroHighBits:
+ //case SN_MultiplyNoFlags:
+ //// TODO: implement using _mulx_u32/u64:
+ //// ulong MultiplyNoFlags(ulong left, ulong right)
+ //// ulong MultiplyNoFlags(ulong left, ulong right, ulong* low) => MultiplyNoFlags(left, right, low);
+ //// uint MultiplyNoFlags(uint left, uint right)
+ //// uint MultiplyNoFlags(uint left, uint right, uint* low)
+ //return NULL;
+ //case SN_ZeroHighBits:
MONO_INST_NEW (cfg, ins, is_64bit ? OP_BZHI64 : OP_BZHI32);
ins->dreg = alloc_ireg (cfg);
ins->sreg1 = args [0]->dreg;
ins->dreg = alloc_ireg (cfg);
ins->sreg1 = args [0]->dreg;
ins->sreg2 = args [1]->dreg;
- ins->type = STACK_I4;
+ ins->type = is_64bit ? STACK_I8 : STACK_I4;
MONO_ADD_INS (cfg->cbb, ins);
return ins;
case SN_ParallelBitDeposit:
ins->dreg = alloc_ireg (cfg);
ins->sreg1 = args [0]->dreg;
ins->sreg2 = args [1]->dreg;
- ins->type = STACK_I4;
+ ins->type = is_64bit ? STACK_I8 : STACK_I4;
MONO_ADD_INS (cfg->cbb, ins);
return ins;
default:
g_assert_not_reached ();
}
- //printf ("%s %s\n", mono_method_get_full_name (cfg->method), mono_method_get_full_name (cmethod));
}
return NULL;
METHOD(op_Multiply)
METHOD(op_Subtraction)
// BMI1
-//METHOD(AndNot)
-//METHOD(BitFieldExtract)
+METHOD(AndNot)
+METHOD(BitFieldExtract)
METHOD(ExtractLowestSetBit)
METHOD(GetMaskUpToLowestSetBit)
METHOD(ResetLowestSetBit)