;;
--enable-profiler) profiler="yes"
;;
+ --enable-tcg-x86-opt) tcg_x86_opt="yes"
+ ;;
--enable-cocoa)
cocoa="yes" ;
sdl="no" ;
echo "sparse enabled $sparse"
echo "strip binaries $strip_opt"
echo "profiler $profiler"
+echo "TCG optimization $tcg_x86_opt"
echo "static build $static"
echo "-Werror enabled $werror"
if test "$darwin" = "yes" ; then
if test $profiler = "yes" ; then
echo "CONFIG_PROFILER=y" >> $config_host_mak
fi
+if test $tcg_x86_opt = "yes" ; then
+ echo "CONFIG_TCG_TARGET_X86_OPT=y" >> $config_host_mak
+fi
if test "$slirp" = "yes" ; then
echo "CONFIG_SLIRP=y" >> $config_host_mak
QEMU_INCLUDES="-I\$(SRC_PATH)/slirp $QEMU_INCLUDES"
void *puc);
void cpu_resume_from_signal(CPUState *env1, void *puc);
void cpu_io_recompile(CPUState *env, void *retaddr);
-TranslationBlock *tb_gen_code(CPUState *env,
+TranslationBlock *tb_gen_code(CPUState *env,
target_ulong pc, target_ulong cs_base, int flags,
int cflags);
void cpu_exec_init(CPUState *env);
struct TranslationBlock *jmp_next[2];
struct TranslationBlock *jmp_first;
uint32_t icount;
+#ifdef CONFIG_EXEC_PROFILE
+ uint32_t tbexec_count[2];
+#endif
};
static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
+ env1->tlb_table[mmu_idx][page_index].addend;
return qemu_ram_addr_from_host_nofail(p);
}
+
+#if defined(CONFIG_TCG_TARGET_X86_OPT)
+/* extended versions of MMU helpers for x86 TCG target optimization */
+uint8_t REGPARM __ldextb_mmu(target_ulong addr, int mmu_idx, void *ra);
+void REGPARM __stextb_mmu(target_ulong addr, uint8_t val, int mmu_idx, void *ra);
+uint16_t REGPARM __ldextw_mmu(target_ulong addr, int mmu_idx, void *ra);
+void REGPARM __stextw_mmu(target_ulong addr, uint16_t val, int mmu_idx, void *ra);
+uint32_t REGPARM __ldextl_mmu(target_ulong addr, int mmu_idx, void *ra);
+void REGPARM __stextl_mmu(target_ulong addr, uint32_t val, int mmu_idx, void *ra);
+uint64_t REGPARM __ldextq_mmu(target_ulong addr, int mmu_idx, void *ra);
+void REGPARM __stextq_mmu(target_ulong addr, uint64_t val, int mmu_idx, void *ra);
+#endif /* CONFIG_TCG_TARGET_X86_OPT */
#endif
typedef void (CPUDebugExcpHandler)(CPUState *env);
--enable-mixemu \
--disable-vnc-tls \
--extra-ldflags="-lv4l2 -lv4lconvert"
+# --enable-tcg-x86-opt \
+# --enable-debug \
#--enable-profiler \
# --enable-gles2 --gles2dir=/usr
;;
return res;
}
+#if defined(CONFIG_TCG_TARGET_X86_OPT) && !defined(SOFTMMU_CODE_ACCESS)
+/*
+ * extended versions of MMU helpers for x86 TCG target optimization
+ * !defined(SOFTMMU_CODE_ACCESS) suppress warnings from exec.c
+ */
+DATA_TYPE REGPARM glue(glue(__ldext, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ int mmu_idx,
+ void *ra)
+{
+ DATA_TYPE res;
+ int index;
+ target_ulong tlb_addr;
+ target_phys_addr_t ioaddr;
+ unsigned long addend;
+ void *retaddr;
+
+ /* test if there is match for unaligned or IO access */
+ /* XXX: could done more in memory macro in a non portable way */
+ index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+ tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+ if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (tlb_addr & ~TARGET_PAGE_MASK) {
+ /* IO access */
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ goto do_unaligned_access;
+ retaddr = ra;
+ ioaddr = env->iotlb[mmu_idx][index];
+ res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+ } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+ /* slow unaligned access (it spans two pages or IO) */
+ do_unaligned_access:
+ retaddr = ra;
+#ifdef ALIGNED_ONLY
+ do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+#endif
+ res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
+ mmu_idx, retaddr);
+ } else {
+ /* unaligned/aligned access in the same page */
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0) {
+ retaddr = ra;
+ do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ }
+#endif
+ addend = env->tlb_table[mmu_idx][index].addend;
+ res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(long)(addr+addend));
+ }
+ } else {
+ /* the page is not in the TLB : fill it */
+ retaddr = ra;
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+#endif
+ tlb_fill(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ goto redo;
+ }
+ return res;
+}
+#endif /* CONFIG_TCG_TARGET_X86_OPT */
+
/* handle all unaligned cases */
static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
int mmu_idx,
}
}
+#if defined(CONFIG_TCG_TARGET_X86_OPT)
+/*
+ * extended versions of MMU helpers for x86 TCG target optimization
+ * !defined(SOFTMMU_CODE_ACCESS) suppress warnings from exec.c
+ */
+void REGPARM glue(glue(__stext, SUFFIX), MMUSUFFIX)(target_ulong addr,
+ DATA_TYPE val,
+ int mmu_idx,
+ void *ra)
+{
+ target_phys_addr_t ioaddr;
+ unsigned long addend;
+ target_ulong tlb_addr;
+ void *retaddr;
+ int index;
+
+ index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ redo:
+ tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (tlb_addr & ~TARGET_PAGE_MASK) {
+ /* IO access */
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ goto do_unaligned_access;
+ retaddr = ra;
+ ioaddr = env->iotlb[mmu_idx][index];
+ glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+ } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
+ do_unaligned_access:
+ retaddr = ra;
+#ifdef ALIGNED_ONLY
+ do_unaligned_access(addr, 1, mmu_idx, retaddr);
+#endif
+ glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+ mmu_idx, retaddr);
+ } else {
+ /* aligned/unaligned access in the same page */
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0) {
+ retaddr = ra;
+ do_unaligned_access(addr, 1, mmu_idx, retaddr);
+ }
+#endif
+ addend = env->tlb_table[mmu_idx][index].addend;
+ glue(glue(st, SUFFIX), _raw)((uint8_t *)(long)(addr+addend), val);
+ }
+ } else {
+ /* the page is not in the TLB : fill it */
+ retaddr = ra;
+#ifdef ALIGNED_ONLY
+ if ((addr & (DATA_SIZE - 1)) != 0)
+ do_unaligned_access(addr, 1, mmu_idx, retaddr);
+#endif
+ tlb_fill(addr, 1, mmu_idx, retaddr);
+ goto redo;
+ }
+}
+#endif /* CONFIG_TCG_TARGET_X86_OPT */
+
/* handles all unaligned cases */
static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(target_ulong addr,
DATA_TYPE val,
tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
}
#endif
-
+
static void gen_add_A0_im(DisasContext *s, int val)
{
#ifdef TARGET_X86_64
}
}
-static inline void gen_op_movl_T0_Dshift(int ot)
+static inline void gen_op_movl_T0_Dshift(int ot)
{
tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
case CC_OP_SUBW:
case CC_OP_SUBL:
case CC_OP_SUBQ:
-
+
size = cc_op - CC_OP_SUBB;
switch(jcc_op) {
case JCC_Z:
switch(size) {
case 0:
tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
0, l1);
break;
case 1:
tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
0, l1);
break;
#ifdef TARGET_X86_64
case 2:
tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
0, l1);
break;
#endif
default:
- tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
+ tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
0, l1);
break;
}
break;
-
+
case JCC_B:
cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
goto fast_jcc_b;
}
tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
break;
-
+
case JCC_L:
cond = inv ? TCG_COND_GE : TCG_COND_LT;
goto fast_jcc_l;
}
tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
break;
-
+
default:
goto slow_jcc;
}
break;
-
+
/* some jumps are easy to compute */
case CC_OP_ADDB:
case CC_OP_ADDW:
case CC_OP_ADDL:
case CC_OP_ADDQ:
-
+
case CC_OP_ADCB:
case CC_OP_ADCW:
case CC_OP_ADCL:
case CC_OP_ADCQ:
-
+
case CC_OP_SBBB:
case CC_OP_SBBW:
case CC_OP_SBBL:
case CC_OP_SBBQ:
-
+
case CC_OP_LOGICB:
case CC_OP_LOGICW:
case CC_OP_LOGICL:
case CC_OP_LOGICQ:
-
+
case CC_OP_INCB:
case CC_OP_INCW:
case CC_OP_INCL:
case CC_OP_INCQ:
-
+
case CC_OP_DECB:
case CC_OP_DECW:
case CC_OP_DECL:
case CC_OP_DECQ:
-
+
case CC_OP_SHLB:
case CC_OP_SHLW:
case CC_OP_SHLL:
case CC_OP_SHLQ:
-
+
case CC_OP_SARB:
case CC_OP_SARW:
case CC_OP_SARL:
default:
slow_jcc:
gen_setcc_slow_T0(s, jcc_op);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
+ tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
cpu_T[0], 0, l1);
break;
}
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
-static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
+static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
int is_right, int is_arith)
{
target_ulong mask;
gen_op_st_T0_A0(ot + s->mem_index);
else
gen_op_mov_reg_T0(ot, op1);
-
+
/* update eflags if non zero shift */
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
else
tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
-
+
gen_set_label(shift_label);
s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
int is_right, int is_arith)
{
int mask;
-
+
if (ot == OT_QUAD)
mask = 0x3f;
else
gen_op_st_T0_A0(ot + s->mem_index);
else
gen_op_mov_reg_T0(ot, op1);
-
+
/* update eflags if non zero shift */
if (op2 != 0) {
tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
tcg_gen_shri_tl(ret, arg1, -arg2);
}
-static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
int is_right)
{
target_ulong mask;
shifts. */
label1 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
-
+
if (ot <= OT_WORD)
tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
else
tcg_gen_mov_tl(cpu_tmp0, t1);
-
+
gen_extu(ot, t0);
tcg_gen_mov_tl(t2, t0);
} else {
gen_op_mov_reg_v(ot, op1, t0);
}
-
+
/* update eflags */
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
}
tcg_gen_andi_tl(t0, t0, CC_C);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
+
tcg_gen_discard_tl(cpu_cc_dst);
tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
+
gen_set_label(label2);
s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
}
/* XXX: add faster immediate = 1 case */
-static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
+static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
int is_right)
{
int label1;
gen_op_ld_T0_A0(ot + s->mem_index);
else
gen_op_mov_TN_reg(ot, 0, op1);
-
+
if (is_right) {
switch (ot) {
case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
tcg_gen_discard_tl(cpu_cc_dst);
tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
+
gen_set_label(label1);
s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
}
/* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
+static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1,
int is_right)
{
int label1, label2, data_bits;
shifts. */
label1 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
-
+
tcg_gen_addi_tl(cpu_tmp5, t2, -1);
if (ot == OT_WORD) {
/* Note: we implement the Intel behaviour for shift count > 16 */
tcg_gen_ext32u_tl(t0, t0);
tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
-
+
/* only needed if count > 16, but a test would complicate */
tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
tcg_gen_shli_tl(t1, t1, 16);
tcg_gen_or_tl(t1, t1, t0);
tcg_gen_ext32u_tl(t1, t1);
-
+
tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5);
tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0);
tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
tcg_gen_shl_tl(t1, t1, cpu_tmp5);
tcg_gen_or_tl(t0, t0, t1);
-
+
} else {
if (ot == OT_LONG)
tcg_gen_ext32u_tl(t1, t1);
tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-
+
tcg_gen_shl_tl(t0, t0, t2);
tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
tcg_gen_shr_tl(t1, t1, cpu_tmp5);
} else {
gen_op_mov_reg_v(ot, op1, t0);
}
-
+
/* update eflags */
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
return 4;
}
+#ifdef CONFIG_EXEC_PROFILE
+/* generation of TB execution profiling */
+static inline void gen_prof_tbexec(DisasContext *s, int tb_num)
+{
+ tcg_gen_movi_tl(cpu_T[0], (target_ulong)((void *)s->tb +
+ offsetof(TranslationBlock, tbexec_count) +
+ sizeof (uint32_t) * (tb_num & 0x1)));
+ tcg_gen_ld32s_tl(cpu_T[1], cpu_T[0], 0);
+ tcg_gen_addi_i32(cpu_T[1], cpu_T[1], 1);
+ tcg_gen_st32_tl(cpu_T[1], cpu_T[0], 0);
+}
+#else
+# define gen_prof_tbexec(s, tb_num)
+#endif /* CONFIG_EXEC_PROFILE */
+
static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
{
TranslationBlock *tb;
/* NOTE: we handle the case where the TB spans two pages here */
if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
(pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) {
+ /* profile TB execution, yklee 20111112 */
+ gen_prof_tbexec(s, tb_num);
/* jump to same page: we can use a direct jump */
tcg_gen_goto_tb(tb_num);
gen_jmp_im(eip);
if (s->jmp_opt) {
l1 = gen_new_label();
gen_jcc1(s, cc_op, b, l1);
-
+
gen_goto_tb(s, 0, next_eip);
gen_set_label(l1);
static inline void gen_op_movl_T0_seg(int seg_reg)
{
- tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,segs[seg_reg].selector));
}
static inline void gen_op_movl_seg_T0_vm(int seg_reg)
{
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
- tcg_gen_st32_tl(cpu_T[0], cpu_env,
+ tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,segs[seg_reg].selector));
tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
- tcg_gen_st_tl(cpu_T[0], cpu_env,
+ tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,segs[seg_reg].base));
}
gen_op_set_cc_op(s->cc_op);
gen_jmp_im(cur_eip);
//gen_heler_test_interrupt();
- gen_helper_raise_interrupt(tcg_const_i32(intno),
+ gen_helper_raise_interrupt(tcg_const_i32(intno),
tcg_const_i32(next_eip - cur_eip));
s->is_jmp = DISAS_TB_JUMP;
}
} else if (s->tf) {
gen_helper_single_step();
} else {
+ gen_prof_tbexec(s, 0);
tcg_gen_exit_tb(0);
}
s->is_jmp = DISAS_TB_JUMP;
#endif
{
gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
- tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
#ifdef TARGET_X86_64
if (s->dflag == 2) {
gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
- tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
offsetof(CPUX86State,xmm_regs[reg]));
gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
} else
#endif
{
gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
- tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
offsetof(CPUX86State,xmm_regs[reg]));
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
case 0x7e: /* movd ea, mm */
#ifdef TARGET_X86_64
if (s->dflag == 2) {
- tcg_gen_ld_i64(cpu_T[0], cpu_env,
+ tcg_gen_ld_i64(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
} else
#endif
{
- tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
}
case 0x17e: /* movd ea, xmm */
#ifdef TARGET_X86_64
if (s->dflag == 2) {
- tcg_gen_ld_i64(cpu_T[0], cpu_env,
+ tcg_gen_ld_i64(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
} else
#endif
{
- tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
}
break;
case 0x050: /* movmskps */
rm = (modrm & 7) | REX_B(s);
- tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
offsetof(CPUX86State,xmm_regs[rm]));
gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
break;
case 0x150: /* movmskpd */
rm = (modrm & 7) | REX_B(s);
- tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
offsetof(CPUX86State,xmm_regs[rm]));
gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
gen_jmp_im(pc_start - s->cs_base);
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
- tcg_const_i32(dflag),
+ tcg_const_i32(dflag),
tcg_const_i32(s->pc - pc_start));
} else {
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
- tcg_const_i32(dflag),
+ tcg_const_i32(dflag),
tcg_const_i32(s->pc - s->cs_base));
}
gen_eob(s);
gen_lea_modrm(s, modrm, ®_addr, &offset_addr);
gen_helper_cmpxchg16b(cpu_A0);
} else
-#endif
+#endif
{
if (!(s->cpuid_features & CPUID_CX8))
goto illegal_op;
gen_helper_fildl_FT0(cpu_tmp2_i32);
break;
case 2:
- tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
(s->mem_index >> 2) - 1);
gen_helper_fldl_FT0(cpu_tmp1_i64);
break;
gen_helper_fildl_ST0(cpu_tmp2_i32);
break;
case 2:
- tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
(s->mem_index >> 2) - 1);
gen_helper_fldl_ST0(cpu_tmp1_i64);
break;
break;
case 2:
gen_helper_fisttll_ST0(cpu_tmp1_i64);
- tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
(s->mem_index >> 2) - 1);
break;
case 3:
break;
case 2:
gen_helper_fstl_ST0(cpu_tmp1_i64);
- tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
(s->mem_index >> 2) - 1);
break;
case 3:
gen_helper_fpop();
break;
case 0x3d: /* fildll */
- tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
(s->mem_index >> 2) - 1);
gen_helper_fildll_ST0(cpu_tmp1_i64);
break;
case 0x3f: /* fistpll */
gen_helper_fistll_ST0(cpu_tmp1_i64);
- tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
(s->mem_index >> 2) - 1);
gen_helper_fpop();
break;
ot = dflag ? OT_LONG : OT_WORD;
gen_op_mov_TN_reg(OT_WORD, 0, R_EDX);
gen_op_andl_T0_ffff();
- gen_check_io(s, ot, pc_start - s->cs_base,
+ gen_check_io(s, ot, pc_start - s->cs_base,
SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
gen_jmp_im(pc_start - s->cs_base);
- gen_helper_iret_protected(tcg_const_i32(s->dflag),
+ gen_helper_iret_protected(tcg_const_i32(s->dflag),
tcg_const_i32(s->pc - s->cs_base));
s->cc_op = CC_OP_EFLAGS;
}
break;
case 4: /* STGI */
if ((!(s->flags & HF_SVME_MASK) &&
- !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+ !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
!s->pe)
goto illegal_op;
if (s->cpl != 0) {
}
break;
case 6: /* SKINIT */
- if ((!(s->flags & HF_SVME_MASK) &&
- !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+ if ((!(s->flags & HF_SVME_MASK) &&
+ !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
!s->pe)
goto illegal_op;
gen_helper_skinit();
}
}
+#if !defined(CONFIG_TCG_TARGET_X86_OPT)
+/* to suppress warnings */
+
/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
EAX. It will be useful once fixed registers globals are less
common. */
}
#endif
}
+#endif /* !CONFIG_TCG_TARGET_X86_OPT */
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
int base, tcg_target_long ofs, int sizeop)
}
}
+#if !defined(CONFIG_TCG_TARGET_X86_OPT)
+/* to suppress warnings */
+
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
int opc)
{
}
#endif
}
+#endif /* !CONFIG_TCG_TARGET_X86_OPT */
+
+#if defined(CONFIG_TCG_TARGET_X86_OPT)
+/* optimization to reduce jump overheads */
+
+/* extened versions of MMU helpers */
+static void *qemu_ldext_helpers[4] = {
+ __ldextb_mmu,
+ __ldextw_mmu,
+ __ldextl_mmu,
+ __ldextq_mmu,
+};
+static void *qemu_stext_helpers[4] = {
+ __stextb_mmu,
+ __stextw_mmu,
+ __stextl_mmu,
+ __stextq_mmu,
+};
+
+/*
+ * qemu_ld/st code generator call add_helper_label,
+ * so that slow case(TLB miss or I/O rw) is handled at the end of TB
+ */
+static void add_helper_label(TCGContext *s,
+ int opc_ext,
+ int data_reg,
+ int data_reg2,
+ int addrlo_reg,
+ int addrhi_reg,
+ int mem_index,
+ uint8_t *raddr,
+ uint32_t **label_ptr)
+{
+ int idx;
+ HelperLabel *label;
+
+ if (s->nb_helper_labels >= TCG_MAX_HELPER_LABELS)
+ tcg_abort();
+
+ idx = s->nb_helper_labels++;
+ label = (HelperLabel *)&s->helper_labels[idx];
+ label->opc_ext = opc_ext;
+ label->datalo_reg = data_reg;
+ label->datahi_reg = data_reg2;
+ label->addrlo_reg = addrlo_reg;
+ label->addrhi_reg = addrhi_reg;
+ label->mem_index = mem_index;
+ label->raddr = raddr;
+ if (!label_ptr) {
+ tcg_abort();
+ }
+ label->label_ptr[0] = label_ptr[0];
+ label->label_ptr[1] = label_ptr[1];
+}
+
+/* generates slow case of qemu_ld at the end of TB */
+static void tcg_out_qemu_ld_helper_call(TCGContext *s, HelperLabel *label)
+{
+ int s_bits, arg_idx;
+ int opc = label->opc_ext & HL_OPC_MASK;
+ int mem_index = label->mem_index;
+ int data_reg = label->datalo_reg;
+ int data_reg2 = label->datahi_reg;
+ int addrhi_reg = label->addrhi_reg;
+ uint8_t *raddr = label->raddr;
+ uint32_t **label_ptr = &label->label_ptr[0];
+
+ s_bits = opc & 3;
+
+ /* resolove label address */
+ *label_ptr[0] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[0] - 4);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ *label_ptr[1] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[1] - 4);
+ }
+
+ /* 1st parameter(vaddr) has been alreay set in %eax */
+ arg_idx = 1;
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
+ addrhi_reg);
+ }
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
+ mem_index);
+ /* return address should indicate qemu_ld IR codes */
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+ /* 4 word parameters */
+ tcg_out_pushi(s, (int)(raddr - 1));
+ } else {
+ /* 3 word parameters */
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, (int)(raddr - 1));
+ }
+ tcg_out_calli(s, (tcg_target_long)qemu_ldext_helpers[s_bits]);
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+ /* Pop and discard. This is 2 bytes smaller than the add. */
+ tcg_out_pop(s, TCG_REG_ECX);
+ }
+
+ switch(opc) {
+ case 0 | 4:
+ tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+ case 1 | 4:
+ tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+ case 0:
+ tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
+ break;
+ case 1:
+ tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
+ break;
+ case 2:
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case 2 | 4:
+ tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+ break;
+#endif
+ case 3:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
+ } else if (data_reg == TCG_REG_EDX) {
+ /* xchg %edx, %eax */
+ tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+
+ /* jump back to original code */
+ tcg_out_jmp(s, (tcg_target_long) raddr);
+}
+
+/* generates slow case of qemu_st at the end of TB */
+static void tcg_out_qemu_st_helper_call(TCGContext *s, HelperLabel *label)
+{
+ int s_bits;
+ int stack_adjust;
+ int opc = label->opc_ext & HL_OPC_MASK;
+ int mem_index = label->mem_index;
+ int data_reg = label->datalo_reg;
+ int data_reg2 = label->datahi_reg;
+ int addrhi_reg = label->addrhi_reg;
+ uint8_t *raddr = label->raddr;
+ uint32_t **label_ptr = &label->label_ptr[0];
+
+ s_bits = opc & 3;
+
+ /* resolove label address */
+ *label_ptr[0] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[0] - 4);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ *label_ptr[1] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[1] - 4);
+ }
+
+ /* 1st parameter(vaddr) has been already set */
+ /* return address should indicate qemu_st IR codes */
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ TCG_REG_RSI, data_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
+ /* return address should indicate qemu_st IR codes */
+ /* stack growth: 1word * 64bit */
+ tcg_out_pushi(s, (int)(raddr - 1));
+ stack_adjust = 8;
+ } else if (TARGET_LONG_BITS == 32) {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
+ if (opc == 3) {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
+ tcg_out_pushi(s, (int)(raddr - 1));
+ tcg_out_pushi(s, mem_index);
+ stack_adjust = 8;
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
+ tcg_out_pushi(s, (int)(raddr - 1));
+ stack_adjust = 4;
+ }
+ } else {
+ if (opc == 3) {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, addrhi_reg);
+ tcg_out_pushi(s, (int)(raddr - 1));
+ tcg_out_pushi(s, mem_index);
+ tcg_out_push(s, data_reg2);
+ tcg_out_push(s, data_reg);
+ stack_adjust = 16;
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, addrhi_reg);
+ switch(opc) {
+ case 0:
+ tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
+ break;
+ case 1:
+ tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
+ break;
+ case 2:
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
+ break;
+ }
+ tcg_out_pushi(s, (int)(raddr - 1));
+ tcg_out_pushi(s, mem_index);
+ stack_adjust = 8;
+ }
+ }
+
+ tcg_out_calli(s, (tcg_target_long)qemu_stext_helpers[s_bits]);
+
+ if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
+ /* Pop and discard. This is 2 bytes smaller than the add. */
+ tcg_out_pop(s, TCG_REG_ECX);
+ } else if (stack_adjust != 0) {
+ tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
+ }
+
+ /* jump back to original code */
+ tcg_out_jmp(s, (tcg_target_long) raddr);
+}
+
+/* generates all of the slow cases of qemu_ld/st at the end of TB */
+void tcg_out_qemu_ldst_helper_calls(TCGContext *s)
+{
+ int i;
+ HelperLabel *label;
+
+ for (i = 0; i < s->nb_helper_labels; i++) {
+ label = (HelperLabel *)&s->helper_labels[i];
+ if (IS_QEMU_LD_LABEL(label)) {
+ tcg_out_qemu_ld_helper_call(s, label);
+ } else {
+ tcg_out_qemu_st_helper_call(s, label);
+ }
+ }
+}
+
+/*
+ * almost same with tcg_out_tlb_load except that forward jump target is different
+ *
+ */
+
+static inline void tcg_out_tlb_load_opt(TCGContext *s, int addrlo_idx,
+ int mem_index, int s_bits,
+ const TCGArg *args,
+ uint32_t **label_ptr, int which)
+{
+ const int addrlo = args[addrlo_idx];
+ const int r0 = tcg_target_call_iarg_regs[0];
+ const int r1 = tcg_target_call_iarg_regs[1];
+ TCGType type = TCG_TYPE_I32;
+ int rexw = 0;
+
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
+ type = TCG_TYPE_I64;
+ rexw = P_REXW;
+ }
+
+ tcg_out_mov(s, type, r1, addrlo);
+ tcg_out_mov(s, type, r0, addrlo);
+
+ tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ tgen_arithi(s, ARITH_AND + rexw, r0,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+ tgen_arithi(s, ARITH_AND + rexw, r1,
+ (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
+
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
+ offsetof(CPUState, tlb_table[mem_index][0])
+ + which);
+
+ /* cmp 0(r1), r0 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+
+ tcg_out_mov(s, type, r0, addrlo);
+
+ /* jne label1; short jump is not enough in case of big TB */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ if (!label_ptr) {
+ tcg_abort();
+ }
+ label_ptr[0] = (uint32_t *)s->code_ptr;
+ s->code_ptr += 4;
+
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ /* cmp 4(r1), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
+
+ /* jne label1 */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ label_ptr[1] = (uint32_t *)s->code_ptr;
+ s->code_ptr += 4;
+ }
+
+ /* TLB Hit. */
+
+ /* add addend(r1), r0 */
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+ offsetof(CPUTLBEntry, addend) - which);
+}
+
+/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
+ EAX. It will be useful once fixed registers globals are less
+ common. */
+static void tcg_out_qemu_ld_opt(TCGContext *s, const TCGArg *args,
+ int opc)
+{
+ int data_reg, data_reg2 = 0;
+ int addrlo_idx;
+ int mem_index, s_bits;
+ uint32_t *label_ptr[2];
+
+ data_reg = args[0];
+ addrlo_idx = 1;
+ if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+ data_reg2 = args[1];
+ addrlo_idx = 2;
+ }
+
+ mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+ s_bits = opc & 3;
+
+ tcg_out_tlb_load_opt(s, addrlo_idx, mem_index, s_bits, args,
+ &label_ptr[0], offsetof(CPUTLBEntry, addr_read));
+
+ /* TLB Hit. */
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
+ tcg_target_call_iarg_regs[0], 0, opc);
+
+ /* helper stub will be jumped back here */
+ add_helper_label(s, opc, data_reg, data_reg2,
+ args[addrlo_idx],
+ (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) ? args[addrlo_idx + 1] : 0,
+ mem_index, s->code_ptr, label_ptr);
+
+}
+
+static void tcg_out_qemu_st_opt(TCGContext *s, const TCGArg *args,
+ int opc)
+{
+ int data_reg, data_reg2 = 0;
+ int addrlo_idx;
+ int mem_index, s_bits;
+ uint32_t *label_ptr[2];
+
+ data_reg = args[0];
+ addrlo_idx = 1;
+ if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+ data_reg2 = args[1];
+ addrlo_idx = 2;
+ }
+
+ mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+ s_bits = opc;
+
+ tcg_out_tlb_load_opt(s, addrlo_idx, mem_index, s_bits, args,
+ &label_ptr[0], offsetof(CPUTLBEntry, addr_write));
+
+ /* TLB Hit. */
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2,
+ tcg_target_call_iarg_regs[0], 0, opc);
+
+ /* helper stub will be jumped back here */
+ add_helper_label(s, opc | HL_ST_MASK, data_reg, data_reg2,
+ args[addrlo_idx],
+ (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) ? args[addrlo_idx + 1] : 0,
+ mem_index, s->code_ptr, label_ptr);
+}
+
+#endif /* CONFIG_TCG_TARGET_X86_OPT */
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
tcg_out_ext16u(s, args[0], args[1]);
break;
+#if defined(CONFIG_TCG_TARGET_X86_OPT) && defined(CONFIG_SOFTMMU)
+#define tcg_out_qemu_ld(S, ARGS, OPC) tcg_out_qemu_ld_opt(S, ARGS, OPC)
+#endif /* CONFIG_TCG_TARGET_X86_OPT */
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
tcg_out_qemu_ld(s, args, 3);
break;
+#if defined(CONFIG_TCG_TARGET_X86_OPT) && defined(CONFIG_SOFTMMU)
+#define tcg_out_qemu_st(S, ARGS, OPC) tcg_out_qemu_st_opt(S, ARGS, OPC)
+#endif /* CONFIG_TCG_TARGET_X86_OPT */
case INDEX_op_qemu_st8:
tcg_out_qemu_st(s, args, 0);
break;
{ INDEX_op_qemu_st32, { "L", "L", "L" } },
{ INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
#endif
+#ifdef CONFIG_EXEC_PROFILE
+ { INDEX_op_prof_tbexec, { } },
+#endif
{ -1 },
};
tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
#else
TCGv_i32 t0 = tcg_temp_new_i32();
-
+
tcg_gen_ext8u_i32(t0, arg);
tcg_gen_shli_i32(t0, t0, 8);
tcg_gen_shri_i32(ret, arg, 8);
TCGv_i32 t0, t1;
t0 = tcg_temp_new_i32();
t1 = tcg_temp_new_i32();
-
+
tcg_gen_shli_i32(t0, arg, 24);
-
+
tcg_gen_andi_i32(t1, arg, 0x0000ff00);
tcg_gen_shli_i32(t1, t1, 8);
tcg_gen_or_i32(t0, t0, t1);
-
+
tcg_gen_shri_i32(t1, arg, 8);
tcg_gen_andi_i32(t1, t1, 0x0000ff00);
tcg_gen_or_i32(t0, t0, t1);
-
+
tcg_gen_shri_i32(t1, arg, 24);
tcg_gen_or_i32(ret, t0, t1);
tcg_temp_free_i32(t0);
#else
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
-
+
tcg_gen_shli_i64(t0, arg, 56);
-
+
tcg_gen_andi_i64(t1, arg, 0x0000ff00);
tcg_gen_shli_i64(t1, t1, 40);
tcg_gen_or_i64(t0, t0, t1);
-
+
tcg_gen_andi_i64(t1, arg, 0x00ff0000);
tcg_gen_shli_i64(t1, t1, 24);
tcg_gen_or_i64(t0, t0, t1);
tcg_gen_shri_i64(t1, arg, 8);
tcg_gen_andi_i64(t1, t1, 0xff000000);
tcg_gen_or_i64(t0, t0, t1);
-
+
tcg_gen_shri_i64(t1, arg, 24);
tcg_gen_andi_i64(t1, t1, 0x00ff0000);
tcg_gen_or_i64(t0, t0, t1);
{
/* XXX: must really use a 32 bit size for TCGArg in all cases */
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
- tcg_gen_op2ii(INDEX_op_debug_insn_start,
+ tcg_gen_op2ii(INDEX_op_debug_insn_start,
(uint32_t)(pc), (uint32_t)(pc >> 32));
#else
tcg_gen_op1i(INDEX_op_debug_insn_start, pc);
#endif
}
+#ifdef CONFIG_EXEC_PROFILE
+static inline void tcg_gen_prof_tbexec(int idx)
+{
+ tcg_gen_op1i(INDEX_op_prof_tbexec, idx);
+}
+#else
+# define tcg_gen_prof_tbexec(idx)
+#endif /* CONFIG_EXEC_PROFILE */
+
static inline void tcg_gen_exit_tb(tcg_target_long val)
{
tcg_gen_op1i(INDEX_op_exit_tb, val);
#endif /* TCG_TARGET_REG_BITS != 32 */
+#ifdef CONFIG_EXEC_PROFILE
+DEF(prof_tbexec, 0, 1, 0, 0)
+#endif /* CONFIG_EXEC_PROFILE */
+
#undef DEF
/* define it to use liveness analysis (better code) */
#define USE_LIVENESS_ANALYSIS
-#define USE_TCG_OPTIMIZATIONS
+#define USE_TCG_OPTIMIZATIONS
#include "config.h"
static void tcg_target_init(TCGContext *s);
static void tcg_target_qemu_prologue(TCGContext *s);
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(uint8_t *code_ptr, int type,
tcg_target_long value, tcg_target_long addend);
static TCGOpDef tcg_op_defs[] = {
l = &s->labels[label_index];
if (l->has_value) {
/* FIXME: This may break relocations on RISC targets that
- modify instruction fields in place. The caller may not have
+ modify instruction fields in place. The caller may not have
written the initial value. */
patch_reloc(code_ptr, type, l->u.value, addend);
} else {
}
}
-static void tcg_out_label(TCGContext *s, int label_index,
+static void tcg_out_label(TCGContext *s, int label_index,
tcg_target_long value)
{
TCGLabel *l;
{
TCGPool *p;
int pool_size;
-
+
if (size > TCG_POOL_CHUNK_SIZE) {
/* big malloc: insert a new pool (XXX: could optimize) */
p = qemu_malloc(sizeof(TCGPool) + size);
p = qemu_malloc(sizeof(TCGPool) + pool_size);
p->size = pool_size;
p->next = NULL;
- if (s->pool_current)
+ if (s->pool_current)
s->pool_current->next = p;
else
s->pool_first = p;
memset(s, 0, sizeof(*s));
s->temps = s->static_temps;
s->nb_globals = 0;
-
+
/* Count total number of arguments and allocate the corresponding
space */
total_args = 0;
sorted_args += n;
args_ct += n;
}
-
+
tcg_target_init(s);
}
s->code_buf = code_gen_prologue;
s->code_ptr = s->code_buf;
tcg_target_qemu_prologue(s);
- flush_icache_range((unsigned long)s->code_buf,
+ flush_icache_range((unsigned long)s->code_buf,
(unsigned long)s->code_ptr);
}
gen_opc_ptr = gen_opc_buf;
gen_opparam_ptr = gen_opparam_buf;
+#if defined(CONFIG_TCG_TARGET_X86_OPT) && defined(CONFIG_SOFTMMU)
+ s->helper_labels = tcg_malloc(sizeof(HelperLabel) * TCG_MAX_HELPER_LABELS);
+ if (!s->helper_labels) {
+ tcg_abort();
+ }
+ s->nb_helper_labels = 0;
+#endif
}
static inline void tcg_temp_alloc(TCGContext *s, int n)
if (idx < s->nb_globals) {
pstrcpy(buf, buf_size, ts->name);
} else {
- if (ts->temp_local)
+ if (ts->temp_local)
snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
else
snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
tcg_target_ulong v;
if (unlikely(!s->helpers_sorted)) {
- qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo),
+ qsort(s->helpers, s->nb_helpers, sizeof(TCGHelperInfo),
helper_cmp);
s->helpers_sorted = 1;
}
#else
pc = args[0];
#endif
- if (!first_insn)
+ if (!first_insn)
fprintf(outfile, "\n");
fprintf(outfile, " ---- 0x%" PRIx64, pc);
first_insn = 0;
tcg_get_arg_str_idx(s, buf, sizeof(buf), args[nb_oargs + i]));
}
}
- } else if (c == INDEX_op_movi_i32
+ } else if (c == INDEX_op_movi_i32
#if TCG_TARGET_REG_BITS == 64
|| c == INDEX_op_movi_i64
#endif
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
nb_cargs = def->nb_cargs;
- fprintf(outfile, " %s %s,$", def->name,
+ fprintf(outfile, " %s %s,$", def->name,
tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0]));
val = args[1];
th = tcg_find_helper(s, val);
nb_iargs = def->nb_iargs;
nb_cargs = def->nb_cargs;
}
-
+
k = 0;
for(i = 0; i < nb_oargs; i++) {
if (k != 0)
#ifdef USE_LIVENESS_ANALYSIS
/* set a nop for an operation using 'nb_args' */
-static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
+static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
TCGArg *args, int nb_args)
{
if (nb_args == 0) {
const TCGOpDef *def;
uint8_t *dead_temps;
unsigned int dead_iargs;
-
+
gen_opc_ptr++; /* skip end */
nb_ops = gen_opc_ptr - gen_opc_buf;
s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
-
+
dead_temps = tcg_malloc(s->nb_temps);
memset(dead_temps, 1, s->nb_temps);
if (!dead_temps[arg])
goto do_not_remove_call;
}
- tcg_set_nop(s, gen_opc_buf + op_index,
+ tcg_set_nop(s, gen_opc_buf + op_index,
args - 1, nb_args);
} else {
do_not_remove_call:
arg = args[i];
dead_temps[arg] = 1;
}
-
+
if (!(call_flags & TCG_CALL_CONST)) {
/* globals are live (they may be used by the call) */
memset(dead_temps, 0, s->nb_globals);
for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
if (s->reg_to_temp[i] >= 0) {
- printf("%s: %s\n",
- tcg_target_reg_names[i],
+ printf("%s: %s\n",
+ tcg_target_reg_names[i],
tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
}
}
ts = &s->temps[k];
if (ts->val_type != TEMP_VAL_REG ||
ts->reg != reg) {
- printf("Inconsistency for register %s:\n",
+ printf("Inconsistency for register %s:\n",
tcg_target_reg_names[reg]);
goto fail;
}
if (ts->val_type == TEMP_VAL_REG &&
!ts->fixed_reg &&
s->reg_to_temp[ts->reg] != k) {
- printf("Inconsistency for temp %s:\n",
+ printf("Inconsistency for temp %s:\n",
tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
fail:
printf("reg state:\n");
ts = &s->temps[temp];
assert(ts->val_type == TEMP_VAL_REG);
if (!ts->mem_coherent) {
- if (!ts->mem_allocated)
+ if (!ts->mem_allocated)
temp_allocate_frame(s, temp);
tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
}
ts->val_type = TEMP_VAL_MEM;
break;
case TEMP_VAL_CONST:
- reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
allocated_regs);
- if (!ts->mem_allocated)
+ if (!ts->mem_allocated)
temp_allocate_frame(s, temp);
tcg_out_movi(s, ts->type, reg, ts->val);
tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
ots->mem_coherent = 0;
}
-static void tcg_reg_alloc_op(TCGContext *s,
+static void tcg_reg_alloc_op(TCGContext *s,
const TCGOpDef *def, TCGOpcode opc,
const TCGArg *args,
unsigned int dead_iargs)
nb_iargs = def->nb_iargs;
/* copy constants */
- memcpy(new_args + nb_oargs + nb_iargs,
- args + nb_oargs + nb_iargs,
+ memcpy(new_args + nb_oargs + nb_iargs,
+ args + nb_oargs + nb_iargs,
sizeof(TCGArg) * def->nb_cargs);
- /* satisfy input constraints */
+ /* satisfy input constraints */
tcg_regset_set(allocated_regs, s->reserved_regs);
for(k = 0; k < nb_iargs; k++) {
i = def->sorted_args[nb_oargs + k];
/* if the input is aliased to an output and if it is
not dead after the instruction, we must allocate
a new register and move it */
- if (!IS_DEAD_IARG(i - nb_oargs))
+ if (!IS_DEAD_IARG(i - nb_oargs))
goto allocate_in_reg;
}
}
/* nothing to do : the constraint is satisfied */
} else {
allocate_in_reg:
- /* allocate a new register matching the constraint
+ /* allocate a new register matching the constraint
and move the temporary register into it */
reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
tcg_out_mov(s, ts->type, reg, ts->reg);
tcg_regset_set_reg(allocated_regs, reg);
iarg_end: ;
}
-
+
if (def->flags & TCG_OPF_BB_END) {
tcg_reg_alloc_bb_end(s, allocated_regs);
} else {
}
}
}
-
+
if (def->flags & TCG_OPF_CALL_CLOBBER) {
- /* XXX: permit generic clobber register list ? */
+ /* XXX: permit generic clobber register list ? */
for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
tcg_reg_free(s, reg);
}
/* XXX: for load/store we could do that only for the slow path
(i.e. when a memory callback is called) */
-
+
/* store globals and free associated registers (we assume the insn
can modify any global. */
save_globals(s, allocated_regs);
}
-
+
/* satisfy the output constraints */
tcg_regset_set(allocated_regs, s->reserved_regs);
for(k = 0; k < nb_oargs; k++) {
ts->reg = reg;
/* temp value is modified, so the value kept in memory is
potentially not the same */
- ts->mem_coherent = 0;
+ ts->mem_coherent = 0;
s->reg_to_temp[reg] = arg;
}
oarg_end:
/* emit instruction */
tcg_out_op(s, opc, new_args, const_args);
-
+
/* move the outputs in the correct register if needed */
for(i = 0; i < nb_oargs; i++) {
ts = &s->temps[args[i]];
/* assign stack slots first */
/* XXX: preallocate call stack */
call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
- call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
+ call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
~(TCG_TARGET_STACK_ALIGN - 1);
allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
if (allocate_args) {
if (ts->val_type == TEMP_VAL_REG) {
tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
} else if (ts->val_type == TEMP_VAL_MEM) {
- reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
s->reserved_regs);
/* XXX: not correct if reading values from the stack */
tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
} else if (ts->val_type == TEMP_VAL_CONST) {
- reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
s->reserved_regs);
/* XXX: sign extend may be needed on some targets */
tcg_out_movi(s, ts->type, reg, ts->val);
stack_offset += sizeof(tcg_target_long);
#endif
}
-
+
/* assign input registers */
tcg_regset_set(allocated_regs, s->reserved_regs);
for(i = 0; i < nb_regs; i++) {
tcg_regset_set_reg(allocated_regs, reg);
}
}
-
+
/* assign function address */
func_arg = args[nb_oargs + nb_iargs - 1];
arg_ct = &def->args_ct[0];
} else {
tcg_abort();
}
-
-
+
+
/* mark dead temporaries and free the associated registers */
for(i = 0; i < nb_iargs; i++) {
arg = args[nb_oargs + i];
}
}
}
-
+
/* clobber call registers */
for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
tcg_reg_free(s, reg);
}
}
-
+
/* store globals and free associated registers (we assume the call
can modify any global. */
if (!(flags & TCG_CALL_CONST)) {
}
tcg_out_op(s, opc, &func_arg, &const_func_arg);
-
+
if (allocate_args) {
tcg_out_addi(s, TCG_REG_CALL_STACK, STACK_DIR(call_stack_size));
}
s->reg_to_temp[ts->reg] = -1;
ts->val_type = TEMP_VAL_REG;
ts->reg = reg;
- ts->mem_coherent = 0;
+ ts->mem_coherent = 0;
s->reg_to_temp[reg] = arg;
}
}
-
+
return nb_iargs + nb_oargs + def->nb_cargs + 1;
}
goto next;
case INDEX_op_end:
goto the_end;
+#ifdef CONFIG_PROFILER_EX
+ case INDEX_op_qemu_ld8u:
+ case INDEX_op_qemu_ld8s:
+ case INDEX_op_qemu_ld16u:
+ case INDEX_op_qemu_ld16s:
+ case INDEX_op_qemu_ld32:
+ case INDEX_op_qemu_ld64:
+ s->qemu_ld_count++;
+ goto gen;
+ case INDEX_op_qemu_st8:
+ case INDEX_op_qemu_st16:
+ case INDEX_op_qemu_st32:
+ case INDEX_op_qemu_st64:
+ s->qemu_st_count++;
+ gen:
+#endif
default:
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
#endif
}
the_end:
+#if defined(CONFIG_TCG_TARGET_X86_OPT) && defined(CONFIG_SOFTMMU)
+ /* Generate MMU call helpers at the end of block (currently only for qemu_ld/st) */
+ tcg_out_qemu_ldst_helper_calls(s);
+#endif
return -1;
}
tcg_gen_code_common(s, gen_code_buf, -1);
/* flush instruction cache */
- flush_icache_range((unsigned long)gen_code_buf,
+ flush_icache_range((unsigned long)gen_code_buf,
(unsigned long)s->code_ptr);
return s->code_ptr - gen_code_buf;
}
tot = s->interm_time + s->code_time;
cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
tot, tot / 2.4e9);
- cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
- s->tb_count,
+ cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
+ s->tb_count,
s->tb_count1 - s->tb_count,
s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
- cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
+ cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
cpu_fprintf(f, "deleted ops/TB %0.2f\n",
- s->tb_count ?
+ s->tb_count ?
(double)s->del_op_count / s->tb_count : 0);
cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
- s->tb_count ?
+ s->tb_count ?
(double)s->temp_count / s->tb_count : 0,
s->temp_count_max);
-
- cpu_fprintf(f, "cycles/op %0.1f\n",
+
+ cpu_fprintf(f, "cycles/op %0.1f\n",
s->op_count ? (double)tot / s->op_count : 0);
- cpu_fprintf(f, "cycles/in byte %0.1f\n",
+ cpu_fprintf(f, "cycles/in byte %0.1f\n",
s->code_in_len ? (double)tot / s->code_in_len : 0);
- cpu_fprintf(f, "cycles/out byte %0.1f\n",
+ cpu_fprintf(f, "cycles/out byte %0.1f\n",
s->code_out_len ? (double)tot / s->code_out_len : 0);
if (tot == 0)
tot = 1;
- cpu_fprintf(f, " gen_interm time %0.1f%%\n",
+ cpu_fprintf(f, " gen_interm time %0.1f%%\n",
(double)s->interm_time / tot * 100.0);
- cpu_fprintf(f, " gen_code time %0.1f%%\n",
+ cpu_fprintf(f, " gen_code time %0.1f%%\n",
(double)s->code_time / tot * 100.0);
- cpu_fprintf(f, "liveness/code time %0.1f%%\n",
+ cpu_fprintf(f, "liveness/code time %0.1f%%\n",
(double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
s->restore_count);
int type;
uint8_t *ptr;
tcg_target_long addend;
-} TCGRelocation;
+} TCGRelocation;
typedef struct TCGLabel {
int has_value;
are aliases for target_ulong and host pointer sized values respectively.
*/
+#if defined(CONFIG_TCG_TARGET_X86_OPT) && defined(CONFIG_SOFTMMU)
+#define TCG_MAX_HELPER_LABELS 200
+#define HL_LDST_SHIFT 4
+#define HL_LDST_MASK (1 << HL_LDST_SHIFT)
+#define HL_ST_MASK HL_LDST_MASK
+#define HL_OPC_MASK (HL_LDST_MASK - 1)
+#define IS_QEMU_LD_LABEL(L) (!((L)->opc_ext & HL_LDST_MASK))
+#define IS_QEMU_ST_LABEL(L) ((L)->opc_ext & HL_LDST_MASK)
+
+typedef struct HelperLabel {
+ int opc_ext;
+ int datalo_reg;
+ int datahi_reg;
+ int addrlo_reg;
+ int addrhi_reg;
+ int mem_index;
+ uint8_t *raddr; /* return address */
+ uint32_t *label_ptr[2]; /* label pointer to be updated */
+} HelperLabel;
+#endif /* CONFIG_TCG_TARGET_X86_OPT */
+
#ifdef CONFIG_DEBUG_TCG
#define DEBUG_TCGV 1
#endif
/* A pure function only reads its arguments and TCG global variables
and cannot raise exceptions. Hence a call to a pure function can be
safely suppressed if the return value is not used. */
-#define TCG_CALL_PURE 0x0010
+#define TCG_CALL_PURE 0x0010
/* A const function only reads its arguments and does not use TCG
global variables. Hence a call to such a function does not
save TCG global variables back to their canonical location. */
int nb_globals;
int nb_temps;
/* index of free temps, -1 if none */
- int first_free_temp[TCG_TYPE_COUNT * 2];
+ int first_free_temp[TCG_TYPE_COUNT * 2];
/* goto_tb support */
uint8_t *code_buf;
/* liveness analysis */
uint16_t *op_dead_iargs; /* for each operation, each bit tells if the
corresponding input argument is dead */
-
+
/* tells in which temporary a given register is. It does not take
into account fixed registers */
int reg_to_temp[TCG_TARGET_NB_REGS];
int64_t la_time;
int64_t restore_count;
int64_t restore_time;
+#ifdef CONFIG_PROFILER_EX
+ int64_t qemu_ld_count;
+ int64_t qemu_st_count;
+#endif
+#endif
+#if defined(CONFIG_TCG_TARGET_X86_OPT) && defined(CONFIG_SOFTMMU)
+ HelperLabel *helper_labels;
+ int nb_helper_labels;
#endif
};
#define TCG_OPF_BB_END 0x01 /* instruction defines the end of a basic
block */
-#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers
+#define TCG_OPF_CALL_CLOBBER 0x02 /* instruction clobbers call registers
and potentially update globals. */
#define TCG_OPF_SIDE_EFFECTS 0x04 /* instruction has side effects : it
cannot be removed if its output
int used;
#endif
} TCGOpDef;
-
+
typedef struct TCGTargetOpDef {
TCGOpcode op;
const char *args_ct_str[TCG_MAX_OP_ARGS];
#else
#define tcg_qemu_tb_exec(tb_ptr) ((long REGPARM (*)(void *))code_gen_prologue)(tb_ptr)
#endif
+
+#if defined(CONFIG_TCG_TARGET_X86_OPT)
+void tcg_out_qemu_ldst_helper_calls(TCGContext *s);
+#endif