it there. Therefore we always define the variable. */
bool have_bmi1;
+#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
+static bool have_bmi2;
+#else
+# define have_bmi2 0
+#endif
+
static uint8_t *tb_ret_addr;
static void patch_reloc(uint8_t *code_ptr, int type,
tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
break;
case 'c':
+ case_c:
ct->ct |= TCG_CT_REG;
tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
break;
tcg_regset_set32(ct->u.regs, 0, 0xf);
break;
case 'r':
+ case_r:
ct->ct |= TCG_CT_REG;
if (TCG_TARGET_REG_BITS == 64) {
tcg_regset_set32(ct->u.regs, 0, 0xffff);
tcg_regset_set32(ct->u.regs, 0, 0xff);
}
break;
+ case 'C':
+ /* With SHRX et al, we need not use ECX as shift count register. */
+ if (have_bmi2) {
+ goto case_r;
+ } else {
+ goto case_c;
+ }
/* qemu_ld/st address constraint */
case 'L':
# define P_REXB_RM 0
# define P_GS 0
#endif
+#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
+#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
#define OPC_SHIFT_1 (0xd1)
#define OPC_SHIFT_Ib (0xc1)
#define OPC_SHIFT_cl (0xd3)
+#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
+#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
+#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_TESTL (0x85)
#define OPC_XCHG_ax_r32 (0x90)
tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
}
- tmp |= (opc & P_DATA16 ? 1 : 0); /* VEX.pp */
+ /* VEX.pp */
+ if (opc & P_DATA16) {
+ tmp |= 1; /* 0x66 */
+ } else if (opc & P_SIMDF3) {
+ tmp |= 2; /* 0xf3 */
+ } else if (opc & P_SIMDF2) {
+ tmp |= 3; /* 0xf2 */
+ }
tmp |= (~v & 15) << 3; /* VEX.vvvv */
tcg_out8(s, tmp);
tcg_out8(s, opc);
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
- int c, rexw = 0;
+ int c, vexop, rexw = 0;
#if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \
OP_32_64(shl):
c = SHIFT_SHL;
- goto gen_shift;
+ vexop = OPC_SHLX;
+ goto gen_shift_maybe_vex;
OP_32_64(shr):
c = SHIFT_SHR;
- goto gen_shift;
+ vexop = OPC_SHRX;
+ goto gen_shift_maybe_vex;
OP_32_64(sar):
c = SHIFT_SAR;
- goto gen_shift;
+ vexop = OPC_SARX;
+ goto gen_shift_maybe_vex;
OP_32_64(rotl):
c = SHIFT_ROL;
goto gen_shift;
OP_32_64(rotr):
c = SHIFT_ROR;
goto gen_shift;
+ gen_shift_maybe_vex:
+ if (have_bmi2 && !const_args[2]) {
+ tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
+ break;
+ }
+ /* FALLTHRU */
gen_shift:
if (const_args[2]) {
tcg_out_shifti(s, c + rexw, args[0], args[2]);
{ INDEX_op_xor_i32, { "r", "0", "ri" } },
{ INDEX_op_andc_i32, { "r", "r", "ri" } },
- { INDEX_op_shl_i32, { "r", "0", "ci" } },
- { INDEX_op_shr_i32, { "r", "0", "ci" } },
- { INDEX_op_sar_i32, { "r", "0", "ci" } },
+ { INDEX_op_shl_i32, { "r", "0", "Ci" } },
+ { INDEX_op_shr_i32, { "r", "0", "Ci" } },
+ { INDEX_op_sar_i32, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i32, { "r", "0", "ci" } },
{ INDEX_op_rotr_i32, { "r", "0", "ci" } },
{ INDEX_op_xor_i64, { "r", "0", "re" } },
{ INDEX_op_andc_i64, { "r", "r", "rI" } },
- { INDEX_op_shl_i64, { "r", "0", "ci" } },
- { INDEX_op_shr_i64, { "r", "0", "ci" } },
- { INDEX_op_sar_i64, { "r", "0", "ci" } },
+ { INDEX_op_shl_i64, { "r", "0", "Ci" } },
+ { INDEX_op_shr_i64, { "r", "0", "Ci" } },
+ { INDEX_op_sar_i64, { "r", "0", "Ci" } },
{ INDEX_op_rotl_i64, { "r", "0", "ci" } },
{ INDEX_op_rotr_i64, { "r", "0", "ci" } },
__cpuid_count(7, 0, a, b, c, d);
#ifdef bit_BMI
have_bmi1 = (b & bit_BMI) != 0;
+#endif
+#ifndef have_bmi2
+ have_bmi2 = (b & bit_BMI2) != 0;
#endif
}