From: Yeongkyoon Lee Date: Sat, 30 Jun 2012 05:22:39 +0000 (+0900) Subject: [Title] Patch qemu_ld/st optimization according to qemu 1.1 X-Git-Tag: TizenStudio_2.0_p2.3~1273^2~92 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=115f0b99998fbe3e5c7dd41787b0db7a5ef1ed31;p=sdk%2Femulator%2Fqemu.git [Title] Patch qemu_ld/st optimization according to qemu 1.1 [Type] bugfix [Module] TCG [Priority] Medium [Jira#] [Redmine#] [Problem] Emulator doesn't work [Cause] qemu 1.0 based code [Solution] Upgrade to qemu 1.1 [TestCase] Not tested (no platform image) --- diff --git a/exec-all.h b/exec-all.h index 6c222c0..89a19f0 100644 --- a/exec-all.h +++ b/exec-all.h @@ -88,7 +88,7 @@ int cpu_restore_state(struct TranslationBlock *tb, CPUArchState *env, uintptr_t searched_pc); void QEMU_NORETURN cpu_resume_from_signal(CPUArchState *env1, void *puc); void QEMU_NORETURN cpu_io_recompile(CPUArchState *env, uintptr_t retaddr); -TranslationBlock *tb_gen_code(CPUArchState *env, +TranslationBlock *tb_gen_code(CPUArchState *env, target_ulong pc, target_ulong cs_base, int flags, int cflags); void cpu_exec_init(CPUArchState *env); @@ -353,21 +353,6 @@ static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong #else /* cputlb.c */ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr); -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -/* Extended versions of MMU helpers for qemu_ld/st optimization. - They get return address arguments because the caller PCs are not where helpers return to. */ -#if defined(__i386__) || defined(__x86_64__) -uint8_t __ldextb_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr); -void __stextb_mmu(target_ulong addr, uint8_t val, int mmu_idx, uintptr_t retaddr); -uint16_t __ldextw_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr); -void __stextw_mmu(target_ulong addr, uint16_t val, int mmu_idx, uintptr_t retaddr); -uint32_t __ldextl_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr); -void __stextl_mmu(target_ulong addr, uint32_t val, int mmu_idx, uintptr_t retaddr); -uint64_t __ldextq_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr); -void __stextq_mmu(target_ulong addr, uint64_t val, int mmu_idx, uintptr_t retaddr); -#endif -#endif /* CONFIG_QEMU_LDST_OPTIMIZATION */ - #endif typedef void (CPUDebugExcpHandler)(CPUArchState *env); diff --git a/softmmu_defs.h b/softmmu_defs.h index 8d59f9d..e3f6156 100644 --- a/softmmu_defs.h +++ b/softmmu_defs.h @@ -19,6 +19,19 @@ void __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx); uint64_t __ldq_mmu(target_ulong addr, int mmu_idx); void __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx); +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) +/* Extended versions of MMU helpers for qemu_ld/st optimization. + They get return address arguments because the caller PCs are not where helpers return to. */ +uint8_t __ext_ldb_mmu(target_ulong addr, int mmu_idx, uintptr_t ra); +void __ext_stb_mmu(target_ulong addr, uint8_t val, int mmu_idx, uintptr_t ra); +uint16_t __ext_ldw_mmu(target_ulong addr, int mmu_idx, uintptr_t ra); +void __ext_stw_mmu(target_ulong addr, uint16_t val, int mmu_idx, uintptr_t ra); +uint32_t __ext_ldl_mmu(target_ulong addr, int mmu_idx, uintptr_t ra); +void __ext_stl_mmu(target_ulong addr, uint32_t val, int mmu_idx, uintptr_t ra); +uint64_t __ext_ldq_mmu(target_ulong addr, int mmu_idx, uintptr_t ra); +void __ext_stq_mmu(target_ulong addr, uint64_t val, int mmu_idx, uintptr_t ra); +#endif /* CONFIG_QEMU_LDST_OPTIMIZATION && CONFIG_SOFTMMU */ + uint8_t __ldb_cmmu(target_ulong addr, int mmu_idx); void __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx); uint16_t __ldw_cmmu(target_ulong addr, int mmu_idx); diff --git a/softmmu_template.h b/softmmu_template.h index 0b403ce..e6816c1 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -66,6 +66,27 @@ #define HELPER_PREFIX helper_ #endif +#ifndef CONFIG_TCG_PASS_AREG0 +#ifdef USE_EXTENDED_HELPER +/* Exteneded helper funtions have one more argument of address + to which pc is returned after setting TLB entry */ +#ifndef CONFIG_QEMU_LDST_OPTIMIZATION +#error You need CONFIG_QEMU_LDST_OPTIMIZATION! +#endif +#undef HELPER_PREFIX +#define HELPER_PREFIX __ext_ +#define RET_PARAM , uintptr_t raddr +#define RET_VAR raddr +#define GET_RET_ADDR() RET_VAR +#else +#define RET_PARAM +#define RET_VAR +#define GET_RET_ADDR() GETPC() +#endif /* USE_EXTENDED_HELPER */ +#endif /* !CONFIG_TCG_PASS_AREG0 */ + + +#ifndef USE_EXTENDED_HELPER static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, int mmu_idx, @@ -101,12 +122,14 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM #endif /* SHIFT > 2 */ return res; } +#endif /* !USE_EXTENDED_HELPER */ /* handle all cases except unaligned access which span two pages */ DATA_TYPE glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, - int mmu_idx) + int mmu_idx + RET_PARAM) { DATA_TYPE res; int index; @@ -124,13 +147,13 @@ glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM /* IO access */ if ((addr & (DATA_SIZE - 1)) != 0) goto do_unaligned_access; - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); ioaddr = env->iotlb[mmu_idx][index]; res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr); } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { /* slow unaligned access (it spans two pages or IO) */ do_unaligned_access: - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); #ifdef ALIGNED_ONLY do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr); #endif @@ -141,7 +164,7 @@ glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM uintptr_t addend; #ifdef ALIGNED_ONLY if ((addr & (DATA_SIZE - 1)) != 0) { - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr); } #endif @@ -151,7 +174,7 @@ glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM } } else { /* the page is not in the TLB : fill it */ - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); #ifdef ALIGNED_ONLY if ((addr & (DATA_SIZE - 1)) != 0) do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr); @@ -162,6 +185,7 @@ glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM return res; } +#ifndef USE_EXTENDED_HELPER /* handle all unaligned cases */ static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM @@ -213,69 +237,11 @@ glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM } return res; } - -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && !defined(SOFTMMU_CODE_ACCESS) -/* Extended versions of MMU helpers for qemu_ld IR optimization. - They get return address arguments because the caller PCs are not where helpers return to. - !defined(SOFTMMU_CODE_ACCESS) suppress warnings from exec.c */ -#if defined(__i386__) || defined(__x86_64__) -DATA_TYPE glue(glue(__ldext, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, - int mmu_idx, - uintptr_t retaddr) -{ - DATA_TYPE res; - int index; - target_ulong tlb_addr; - target_phys_addr_t ioaddr; - - /* test if there is match for unaligned or IO access */ - /* XXX: could done more in memory macro in a non portable way */ - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - redo: - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; - if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { - if (tlb_addr & ~TARGET_PAGE_MASK) { - /* IO access */ - if ((addr & (DATA_SIZE - 1)) != 0) - goto do_unaligned_access; - ioaddr = env->iotlb[mmu_idx][index]; - res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr); - } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { - /* slow unaligned access (it spans two pages or IO) */ - do_unaligned_access: -#ifdef ALIGNED_ONLY - do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr); -#endif - res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr, - mmu_idx, retaddr); - } else { - /* unaligned/aligned access in the same page */ - uintptr_t addend; -#ifdef ALIGNED_ONLY - if ((addr & (DATA_SIZE - 1)) != 0) { - do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr); - } -#endif - addend = env->tlb_table[mmu_idx][index].addend; - res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t) - (addr + addend)); - } - } else { - /* the page is not in the TLB : fill it */ -#ifdef ALIGNED_ONLY - if ((addr & (DATA_SIZE - 1)) != 0) - do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr); -#endif - tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); - goto redo; - } - return res; -} -#endif /* defined(__i386__) || defined(__x86_64__) */ -#endif /* defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ +#endif /* !USE_EXTENDED_HELPER */ #ifndef SOFTMMU_CODE_ACCESS +#ifndef USE_EXTENDED_HELPER static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, DATA_TYPE val, @@ -312,11 +278,13 @@ static inline void glue(io_write, SUFFIX)(ENV_PARAM #endif #endif /* SHIFT > 2 */ } +#endif /* !USE_EXTENDED_HELPER */ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, DATA_TYPE val, - int mmu_idx) + int mmu_idx + RET_PARAM) { target_phys_addr_t ioaddr; target_ulong tlb_addr; @@ -331,12 +299,12 @@ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM /* IO access */ if ((addr & (DATA_SIZE - 1)) != 0) goto do_unaligned_access; - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); ioaddr = env->iotlb[mmu_idx][index]; glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr); } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { do_unaligned_access: - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); #ifdef ALIGNED_ONLY do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr); #endif @@ -347,7 +315,7 @@ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM uintptr_t addend; #ifdef ALIGNED_ONLY if ((addr & (DATA_SIZE - 1)) != 0) { - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr); } #endif @@ -357,7 +325,7 @@ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM } } else { /* the page is not in the TLB : fill it */ - retaddr = GETPC(); + retaddr = GET_RET_ADDR(); #ifdef ALIGNED_ONLY if ((addr & (DATA_SIZE - 1)) != 0) do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr); @@ -367,6 +335,7 @@ void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM } } +#ifndef USE_EXTENDED_HELPER /* handles all unaligned cases */ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, @@ -416,61 +385,7 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM goto redo; } } - -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) -#if defined(__i386__) || defined(__x86_64__) -/* Extended versions of MMU helpers for qemu_st IR optimization. - They get return address arguments because the caller PCs are not where helpers return to. */ -void glue(glue(__stext, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, - DATA_TYPE val, - int mmu_idx, - uintptr_t retaddr) -{ - target_phys_addr_t ioaddr; - target_ulong tlb_addr; - int index; - - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - redo: - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; - if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { - if (tlb_addr & ~TARGET_PAGE_MASK) { - /* IO access */ - if ((addr & (DATA_SIZE - 1)) != 0) - goto do_unaligned_access; - ioaddr = env->iotlb[mmu_idx][index]; - glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr); - } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) { - do_unaligned_access: -#ifdef ALIGNED_ONLY - do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr); -#endif - glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val, - mmu_idx, retaddr); - } else { - /* aligned/unaligned access in the same page */ - uintptr_t addend; -#ifdef ALIGNED_ONLY - if ((addr & (DATA_SIZE - 1)) != 0) { - do_unaligned_access(addr, 1, mmu_idx, retaddr); - } -#endif - addend = env->tlb_table[mmu_idx][index].addend; - glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t) - (addr + addend), val); - } - } else { - /* the page is not in the TLB : fill it */ -#ifdef ALIGNED_ONLY - if ((addr & (DATA_SIZE - 1)) != 0) - do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr); -#endif - tlb_fill(env, addr, 1, mmu_idx, retaddr); - goto redo; - } -} -#endif /* defined(__i386__) || defined(__x86_64__) */ -#endif /* defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ +#endif /* !USE_EXTENDED_HELPER */ #endif /* !defined(SOFTMMU_CODE_ACCESS) */ @@ -485,3 +400,6 @@ void glue(glue(__stext, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr, #undef ENV_VAR #undef CPU_PREFIX #undef HELPER_PREFIX +#undef RET_PARAM +#undef RET_VAR +#undef GET_RET_ADDR diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index b53369d..87136df 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -71,6 +71,29 @@ uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def, #define SHIFT 3 #include "softmmu_template.h" + +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) +/* Exteneded MMU helper funtions for qemu_ld/st optimization + Note that normal helper functions should be defined above + to avoid duplication of common functions, slow_ld/st and io_read/write. + */ +#define USE_EXTENDED_HELPER + +#define SHIFT 0 +#include "softmmu_template.h" + +#define SHIFT 1 +#include "softmmu_template.h" + +#define SHIFT 2 +#include "softmmu_template.h" + +#define SHIFT 3 +#include "softmmu_template.h" + +#undef USE_EXTENDED_HELPER +#endif /* CONFIG_QEMU_LDST_OPTIMIZATION && CONFIG_SOFTMMU */ + /* try to fill the TLB and return an exception if error. If retaddr is NULL, it means that the function was called in C code (i.e. not from generated code or from helper.c) */ diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c index bc3b94e..f20f56c 100644 --- a/target-i386/op_helper.c +++ b/target-i386/op_helper.c @@ -2045,7 +2045,7 @@ void helper_cmpxchg8b(target_ulong a0) eflags |= CC_Z; } else { /* always do the store */ - stq(a0, d); + stq(a0, d); EDX = (uint32_t)(d >> 32); EAX = (uint32_t)d; eflags &= ~CC_Z; @@ -2070,8 +2070,8 @@ void helper_cmpxchg16b(target_ulong a0) eflags |= CC_Z; } else { /* always do the store */ - stq(a0, d0); - stq(a0 + 8, d1); + stq(a0, d0); + stq(a0 + 8, d1); EDX = d1; EAX = d0; eflags &= ~CC_Z; @@ -2463,7 +2463,7 @@ void helper_lcall_real(int new_cs, target_ulong new_eip1, } /* protected mode call */ -void helper_lcall_protected(int new_cs, target_ulong new_eip, +void helper_lcall_protected(int new_cs, target_ulong new_eip, int shift, int next_eip_addend) { int new_stack, i; @@ -3144,7 +3144,7 @@ void helper_rdpmc(void) raise_exception(EXCP0D_GPF); } helper_svm_check_intercept_param(SVM_EXIT_RDPMC, 0); - + /* currently unimplemented */ raise_exception_err(EXCP06_ILLOP, 0); } @@ -4611,7 +4611,7 @@ void helper_fxsave(target_ulong ptr, int data64) if (data64) { stq(ptr + 0x08, 0); /* rip */ stq(ptr + 0x10, 0); /* rdp */ - } else + } else #endif { stl(ptr + 0x08, 0); /* eip */ @@ -4873,7 +4873,7 @@ void helper_hlt(int next_eip_addend) { helper_svm_check_intercept_param(SVM_EXIT_HLT, 0); EIP += next_eip_addend; - + do_hlt(); } @@ -4995,6 +4995,28 @@ void helper_boundl(target_ulong a0, int v) #define SHIFT 3 #include "softmmu_template.h" +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) +/* Exteneded MMU helper funtions for qemu_ld/st optimization + Note that normal helper functions should be defined above + to avoid duplication of common functions, slow_ld/st and io_read/write. + */ +#define USE_EXTENDED_HELPER + +#define SHIFT 0 +#include "softmmu_template.h" + +#define SHIFT 1 +#include "softmmu_template.h" + +#define SHIFT 2 +#include "softmmu_template.h" + +#define SHIFT 3 +#include "softmmu_template.h" + +#undef USE_EXTENDED_HELPER +#endif /* CONFIG_QEMU_LDST_OPTIMIZATION && CONFIG_SOFTMMU */ + #endif #if !defined(CONFIG_USER_ONLY) @@ -5034,16 +5056,16 @@ void tlb_fill(CPUX86State *env1, target_ulong addr, int is_write, int mmu_idx, #if defined(CONFIG_USER_ONLY) void helper_vmrun(int aflag, int next_eip_addend) -{ +{ } -void helper_vmmcall(void) -{ +void helper_vmmcall(void) +{ } void helper_vmload(int aflag) -{ +{ } void helper_vmsave(int aflag) -{ +{ } void helper_stgi(void) { @@ -5051,14 +5073,14 @@ void helper_stgi(void) void helper_clgi(void) { } -void helper_skinit(void) -{ +void helper_skinit(void) +{ } void helper_invlpga(int aflag) -{ +{ } -void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) -{ +void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) +{ } void helper_svm_check_intercept_param(uint32_t type, uint64_t param) { @@ -5068,7 +5090,7 @@ void svm_check_intercept(CPUX86State *env1, uint32_t type) { } -void helper_svm_check_io(uint32_t port, uint32_t param, +void helper_svm_check_io(uint32_t port, uint32_t param, uint32_t next_eip_addend) { } @@ -5077,16 +5099,16 @@ void helper_svm_check_io(uint32_t port, uint32_t param, static inline void svm_save_seg(target_phys_addr_t addr, const SegmentCache *sc) { - stw_phys(addr + offsetof(struct vmcb_seg, selector), + stw_phys(addr + offsetof(struct vmcb_seg, selector), sc->selector); - stq_phys(addr + offsetof(struct vmcb_seg, base), + stq_phys(addr + offsetof(struct vmcb_seg, base), sc->base); - stl_phys(addr + offsetof(struct vmcb_seg, limit), + stl_phys(addr + offsetof(struct vmcb_seg, limit), sc->limit); - stw_phys(addr + offsetof(struct vmcb_seg, attrib), + stw_phys(addr + offsetof(struct vmcb_seg, attrib), ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00)); } - + static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc) { unsigned int flags; @@ -5098,7 +5120,7 @@ static inline void svm_load_seg(target_phys_addr_t addr, SegmentCache *sc) sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12); } -static inline void svm_load_seg_cache(target_phys_addr_t addr, +static inline void svm_load_seg_cache(target_phys_addr_t addr, CPUX86State *env, int seg_reg) { SegmentCache sc1, *sc = &sc1; @@ -5141,13 +5163,13 @@ void helper_vmrun(int aflag, int next_eip_addend) stq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer); stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags), compute_eflags()); - svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es), + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.es), &env->segs[R_ES]); - svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs), + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.cs), &env->segs[R_CS]); - svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss), + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ss), &env->segs[R_SS]); - svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds), + svm_save_seg(env->vm_hsave + offsetof(struct vmcb, save.ds), &env->segs[R_DS]); stq_phys(env->vm_hsave + offsetof(struct vmcb, save.rip), @@ -5191,7 +5213,7 @@ void helper_vmrun(int aflag, int next_eip_addend) env->hflags2 |= HF2_HIF_MASK; } - cpu_load_efer(env, + cpu_load_efer(env, ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.efer))); env->eflags = 0; load_eflags(ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.rflags)), @@ -5335,13 +5357,13 @@ void helper_vmsave(int aflag) addr, ldq_phys(addr + offsetof(struct vmcb, save.fs.base)), env->segs[R_FS].base); - svm_save_seg(addr + offsetof(struct vmcb, save.fs), + svm_save_seg(addr + offsetof(struct vmcb, save.fs), &env->segs[R_FS]); - svm_save_seg(addr + offsetof(struct vmcb, save.gs), + svm_save_seg(addr + offsetof(struct vmcb, save.gs), &env->segs[R_GS]); - svm_save_seg(addr + offsetof(struct vmcb, save.tr), + svm_save_seg(addr + offsetof(struct vmcb, save.tr), &env->tr); - svm_save_seg(addr + offsetof(struct vmcb, save.ldtr), + svm_save_seg(addr + offsetof(struct vmcb, save.ldtr), &env->ldt); #ifdef TARGET_X86_64 @@ -5379,7 +5401,7 @@ void helper_invlpga(int aflag) { target_ulong addr; helper_svm_check_intercept_param(SVM_EXIT_INVLPGA, 0); - + if (aflag == 2) addr = EAX; else @@ -5468,7 +5490,7 @@ void svm_check_intercept(CPUX86State *env1, uint32_t type) env = saved_env; } -void helper_svm_check_io(uint32_t port, uint32_t param, +void helper_svm_check_io(uint32_t port, uint32_t param, uint32_t next_eip_addend) { if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) { @@ -5477,7 +5499,7 @@ void helper_svm_check_io(uint32_t port, uint32_t param, uint16_t mask = (1 << ((param >> 4) & 7)) - 1; if(lduw_phys(addr + port / 8) & (mask << (port & 7))) { /* next EIP */ - stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), + stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), env->eip + next_eip_addend); helper_vmexit(SVM_EXIT_IOIO, param | (port << 16)); } @@ -5502,13 +5524,13 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) } /* Save the VM state in the vmcb */ - svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es), + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.es), &env->segs[R_ES]); - svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs), + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.cs), &env->segs[R_CS]); - svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss), + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ss), &env->segs[R_SS]); - svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds), + svm_save_seg(env->vm_vmcb + offsetof(struct vmcb, save.ds), &env->segs[R_DS]); stq_phys(env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base); @@ -5557,7 +5579,7 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) cpu_x86_update_cr3(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.cr3))); /* we need to set the efer after the crs so the hidden flags get set properly */ - cpu_load_efer(env, + cpu_load_efer(env, ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.efer))); env->eflags = 0; load_eflags(ldq_phys(env->vm_hsave + offsetof(struct vmcb, save.rflags)), diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 6217b83..552e0ae 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -965,7 +965,6 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest) #include "../../softmmu_defs.h" -#if !defined(CONFIG_QEMU_LDST_OPTIMIZATION) #ifdef CONFIG_TCG_PASS_AREG0 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ @@ -985,6 +984,8 @@ static const void *qemu_st_helpers[4] = { helper_stq_mmu, }; #else + +#ifndef CONFIG_QEMU_LDST_OPTIMIZATION /* legacy helper signature: __ld_mmu(target_ulong addr, int mmu_idx) */ static void *qemu_ld_helpers[4] = { @@ -1002,8 +1003,36 @@ static void *qemu_st_helpers[4] = { __stl_mmu, __stq_mmu, }; -#endif +#else +/* extended legacy helper signature: __ext_ld_mmu(target_ulong addr, int + mmu_idx, uintptr raddr) */ +static void *qemu_ld_helpers[4] = { + __ext_ldb_mmu, + __ext_ldw_mmu, + __ext_ldl_mmu, + __ext_ldq_mmu, +}; + +/* extended legacy helper signature: __ext_st_mmu(target_ulong addr, uintxx_t val, + int mmu_idx) */ +static void *qemu_st_helpers[4] = { + __ext_stb_mmu, + __ext_stw_mmu, + __ext_stl_mmu, + __ext_stq_mmu, +}; + +static void add_qemu_ldst_label(TCGContext *s, + int opc_ext, + int data_reg, + int data_reg2, + int addrlo_reg, + int addrhi_reg, + int mem_index, + uint8_t *raddr, + uint8_t **label_ptr); #endif /* !CONFIG_QEMU_LDST_OPTIMIZATION */ +#endif /* Perform the TLB load and compare. @@ -1063,19 +1092,36 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, tcg_out_mov(s, type, r0, addrlo); +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + if (!label_ptr) { + tcg_abort(); + } + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; +#else /* jne label1 */ tcg_out8(s, OPC_JCC_short + JCC_JNE); label_ptr[0] = s->code_ptr; s->code_ptr++; +#endif if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { /* cmp 4(r1), addrhi */ tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4); +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) + /* jne slow_path */ + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); + label_ptr[1] = s->code_ptr; + s->code_ptr += 4; +#else /* jne label1 */ tcg_out8(s, OPC_JCC_short + JCC_JNE); label_ptr[1] = s->code_ptr; s->code_ptr++; +#endif } /* TLB Hit. */ @@ -1163,7 +1209,6 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, } } -#if !defined(CONFIG_QEMU_LDST_OPTIMIZATION) /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and EAX. It will be useful once fixed registers globals are less common. */ @@ -1174,11 +1219,13 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int addrlo_idx; #if defined(CONFIG_SOFTMMU) int mem_index, s_bits; +#if !defined(CONFIG_QEMU_LDST_OPTIMIZATION) #if TCG_TARGET_REG_BITS == 64 int arg_idx; #else int stack_adjust; #endif +#endif /* !defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ uint8_t *label_ptr[3]; #endif @@ -1200,6 +1247,18 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_out_qemu_ld_direct(s, data_reg, data_reg2, tcg_target_call_iarg_regs[0], 0, opc); +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) + /* helper stub will be jumped back here */ + add_qemu_ldst_label(s, + opc, + data_reg, + data_reg2, + args[addrlo_idx], + args[addrlo_idx + 1], + mem_index, + s->code_ptr, + label_ptr); +#else /* jmp label2 */ tcg_out8(s, OPC_JMP_short); label_ptr[2] = s->code_ptr; @@ -1295,6 +1354,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, /* label2: */ *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; +#endif /* defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ #else { int32_t offset = GUEST_BASE; @@ -1321,7 +1381,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, } #endif } -#endif /* !defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, int base, tcg_target_long ofs, int sizeop) @@ -1382,7 +1441,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, } } -#if !defined(CONFIG_QEMU_LDST_OPTIMIZATION) static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { @@ -1390,7 +1448,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int addrlo_idx; #if defined(CONFIG_SOFTMMU) int mem_index, s_bits; +#if !defined(CONFIG_QEMU_LDST_OPTIMIZATION) int stack_adjust; +#endif uint8_t *label_ptr[3]; #endif @@ -1412,6 +1472,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, tcg_out_qemu_st_direct(s, data_reg, data_reg2, tcg_target_call_iarg_regs[0], 0, opc); +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) + /* helper stub will be jumped back here */ + add_qemu_ldst_label(s, + opc | HL_ST_MASK, + data_reg, + data_reg2, + args[addrlo_idx], + args[addrlo_idx + 1], + mem_index, + s->code_ptr, + label_ptr); +#else /* jmp label2 */ tcg_out8(s, OPC_JMP_short); label_ptr[2] = s->code_ptr; @@ -1474,6 +1546,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, /* label2: */ *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; +#endif /* defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ #else { int32_t offset = GUEST_BASE; @@ -1501,25 +1574,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, #endif } -#endif /* !defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ - #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) /* optimization to reduce jump overheads for qemu_ld/st IRs */ -/* extened versions of MMU helpers */ -static void *qemu_ldext_helpers[4] = { - __ldextb_mmu, - __ldextw_mmu, - __ldextl_mmu, - __ldextq_mmu, -}; -static void *qemu_stext_helpers[4] = { - __stextb_mmu, - __stextw_mmu, - __stextl_mmu, - __stextq_mmu, -}; - /* * qemu_ld/st code generator call add_qemu_ldst_label, * so that slow case(TLB miss or I/O rw) is handled at the end of TB @@ -1532,7 +1589,7 @@ static void add_qemu_ldst_label(TCGContext *s, int addrhi_reg, int mem_index, uint8_t *raddr, - uint32_t **label_ptr) + uint8_t **label_ptr) { int idx; TCGLabelQemuLdst *label; @@ -1562,36 +1619,38 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label) int s_bits; int opc = label->opc_ext & HL_OPC_MASK; int mem_index = label->mem_index; - int data_reg = label->datalo_reg; - int data_reg2 = label->datahi_reg; - int addrlo_reg = label->addrlo_reg; - int addrhi_reg = label->addrhi_reg; - uint8_t *raddr = label->raddr; - uint32_t **label_ptr = &label->label_ptr[0]; #if TCG_TARGET_REG_BITS == 64 int arg_idx; #else int stack_adjust; #endif + int data_reg = label->datalo_reg; + int data_reg2 = label->datahi_reg; + int addrlo_reg = label->addrlo_reg; + int addrhi_reg = label->addrhi_reg; + uint8_t *raddr = label->raddr; + uint8_t **label_ptr = &label->label_ptr[0]; s_bits = opc & 3; - /* resolove label address */ - *label_ptr[0] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[0] - 4); + /* resolve label address */ + *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - *label_ptr[1] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[1] - 4); + *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); } + /* extended helper signature: __ext_ld_mmu(target_ulong addr, int mmu_idx, + uintptr_t raddr) */ #if TCG_TARGET_REG_BITS == 32 - tcg_out_pushi(s, (tcg_target_long)(raddr - 1)); - tcg_out_pushi(s, mem_index); - stack_adjust = 8; + tcg_out_pushi(s, (int)(raddr - 1)); /* return address */ + stack_adjust = 4; + tcg_out_pushi(s, mem_index); /* mmu index */ + stack_adjust += 4; if (TARGET_LONG_BITS == 64) { tcg_out_push(s, addrhi_reg); - /* 4 bytes addrhi_reg and +4 bytes (raddr - 1) */ - stack_adjust += 8; + stack_adjust += 4; } - tcg_out_push(s, addrlo_reg); + tcg_out_push(s, addrlo_reg); /* guest addr */ stack_adjust += 4; #ifdef CONFIG_TCG_PASS_AREG0 tcg_out_push(s, TCG_AREG0); @@ -1600,9 +1659,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label) #else /* The first argument is already loaded with addrlo. */ arg_idx = 1; - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx], + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++], mem_index); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, (tcg_target_long)(raddr - 1)); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++], + (int)(raddr - 1)); #ifdef CONFIG_TCG_PASS_AREG0 /* XXX/FIXME: suboptimal */ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], @@ -1616,7 +1676,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label) #endif #endif - tcg_out_calli(s, (tcg_target_long)qemu_ldext_helpers[s_bits]); + tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); #if TCG_TARGET_REG_BITS == 32 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) { @@ -1663,8 +1723,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label) default: tcg_abort(); } + /* jump back to original code */ - tcg_out_jmp(s, (tcg_target_long)raddr); + tcg_out_jmp(s, (tcg_target_long) raddr); } /* generates slow case of qemu_st at the end of TB */ @@ -1679,31 +1740,34 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label) int addrlo_reg = label->addrlo_reg; int addrhi_reg = label->addrhi_reg; uint8_t *raddr = label->raddr; - uint32_t **label_ptr = &label->label_ptr[0]; + uint8_t **label_ptr = &label->label_ptr[0]; s_bits = opc & 3; - /* resolove label address */ - *label_ptr[0] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[0] - 4); + /* resolve label address */ + *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4); if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - *label_ptr[1] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[1] - 4); + *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4); } + /* extended helper signature: __ext_st_mmu(target_ulong addr, uintxx_t val, + int mmu_idx, uintptr_t raddr) */ #if TCG_TARGET_REG_BITS == 32 - tcg_out_pushi(s, (tcg_target_long)(raddr - 1)); - tcg_out_pushi(s, mem_index); - stack_adjust = 8; + tcg_out_pushi(s, (int)(raddr - 1)); /* return address */ + stack_adjust = 4; + tcg_out_pushi(s, mem_index); /* mmu index */ + stack_adjust += 4; if (opc == 3) { tcg_out_push(s, data_reg2); stack_adjust += 4; } - tcg_out_push(s, data_reg); + tcg_out_push(s, data_reg); /* guest data */ stack_adjust += 4; if (TARGET_LONG_BITS == 64) { tcg_out_push(s, addrhi_reg); stack_adjust += 4; } - tcg_out_push(s, addrlo_reg); + tcg_out_push(s, addrlo_reg); /* guest addr */ stack_adjust += 4; #ifdef CONFIG_TCG_PASS_AREG0 tcg_out_push(s, TCG_AREG0); @@ -1713,10 +1777,8 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label) tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), tcg_target_call_iarg_regs[1], data_reg); tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index); - /* return address should indicate qemu_st IR codes */ - /* stack growth: 1word * 64bit */ - tcg_out_pushi(s, (tcg_target_long)(raddr - 1)); - stack_adjust = 8; + tcg_out_movi(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], (int)(raddr - 1); + stack_adjust = 0; #ifdef CONFIG_TCG_PASS_AREG0 /* XXX/FIXME: suboptimal */ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], @@ -1730,7 +1792,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label) #endif #endif - tcg_out_calli(s, (tcg_target_long)qemu_stext_helpers[s_bits]); + tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) { /* Pop and discard. This is 2 bytes smaller than the add. */ @@ -1758,152 +1820,6 @@ void tcg_out_qemu_ldst_slow_path(TCGContext *s) } } } - -/* - * almost same with tcg_out_tlb_load except that forward jump target is different - * - */ - -static inline void tcg_out_tlb_load_opt(TCGContext *s, int addrlo_idx, - int mem_index, int s_bits, - const TCGArg *args, - uint32_t **label_ptr, int which) -{ - const int addrlo = args[addrlo_idx]; - const int r0 = tcg_target_call_iarg_regs[0]; - const int r1 = tcg_target_call_iarg_regs[1]; - TCGType type = TCG_TYPE_I32; - int rexw = 0; - - if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) { - type = TCG_TYPE_I64; - rexw = P_REXW; - } - - tcg_out_mov(s, type, r1, addrlo); - tcg_out_mov(s, type, r0, addrlo); - - tcg_out_shifti(s, SHIFT_SHR + rexw, r1, - TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - - tgen_arithi(s, ARITH_AND + rexw, r0, - TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); - tgen_arithi(s, ARITH_AND + rexw, r1, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); - - tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0, - offsetof(CPUArchState, tlb_table[mem_index][0]) - + which); - - /* cmp 0(r1), r0 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0); - - tcg_out_mov(s, type, r0, addrlo); - - /* jne label1; short jump is not enough in case of big TB */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - if (!label_ptr) { - tcg_abort(); - } - label_ptr[0] = (uint32_t *)s->code_ptr; - s->code_ptr += 4; - - if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - /* cmp 4(r1), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4); - - /* jne label1 */ - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); - label_ptr[1] = (uint32_t *)s->code_ptr; - s->code_ptr += 4; - } - - /* TLB Hit. */ - - /* add addend(r1), r0 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1, - offsetof(CPUTLBEntry, addend) - which); -} - -/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and - EAX. It will be useful once fixed registers globals are less - common. */ -static void tcg_out_qemu_ld_opt(TCGContext *s, const TCGArg *args, - int opc) -{ - int data_reg, data_reg2 = 0; - int addrlo_idx; - int mem_index, s_bits; - uint32_t *label_ptr[2]; - - data_reg = args[0]; - label_ptr[1] = 0; - addrlo_idx = 1; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { - data_reg2 = args[1]; - addrlo_idx = 2; - } - - mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; - s_bits = opc & 3; - - tcg_out_tlb_load_opt(s, addrlo_idx, mem_index, s_bits, args, - &label_ptr[0], offsetof(CPUTLBEntry, addr_read)); - - /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, - tcg_target_call_iarg_regs[0], 0, opc); - - /* helper stub will be jumped back here */ - add_qemu_ldst_label(s, - opc, - data_reg, - data_reg2, - args[addrlo_idx], - args[addrlo_idx + 1], - mem_index, - s->code_ptr, - label_ptr); - -} - -static void tcg_out_qemu_st_opt(TCGContext *s, const TCGArg *args, - int opc) -{ - int data_reg, data_reg2 = 0; - int addrlo_idx; - int mem_index, s_bits; - uint32_t *label_ptr[2]; - - data_reg = args[0]; - label_ptr[1] = 0; - addrlo_idx = 1; - if (TCG_TARGET_REG_BITS == 32 && opc == 3) { - data_reg2 = args[1]; - addrlo_idx = 2; - } - - mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)]; - s_bits = opc; - - tcg_out_tlb_load_opt(s, addrlo_idx, mem_index, s_bits, args, - &label_ptr[0], offsetof(CPUTLBEntry, addr_write)); - - /* TLB Hit. */ - tcg_out_qemu_st_direct(s, data_reg, data_reg2, - tcg_target_call_iarg_regs[0], 0, opc); - - /* helper stub will be jumped back here */ - add_qemu_ldst_label(s, - opc | HL_ST_MASK, - data_reg, - data_reg2, - args[addrlo_idx], - args[addrlo_idx + 1], - mem_index, - s->code_ptr, - label_ptr); -} #endif /* defined(CONFIG_QEMU_LDST_OPTIMIZATION) */ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, @@ -2118,9 +2034,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_ext16u(s, args[0], args[1]); break; -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -#define tcg_out_qemu_ld(S, ARGS, OPC) tcg_out_qemu_ld_opt(S, ARGS, OPC) -#endif /* defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) */ case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, 0); break; @@ -2143,9 +2056,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_qemu_ld(s, args, 3); break; -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -#define tcg_out_qemu_st(S, ARGS, OPC) tcg_out_qemu_st_opt(S, ARGS, OPC) -#endif /* defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) */ case INDEX_op_qemu_st8: tcg_out_qemu_st(s, args, 0); break; diff --git a/tcg/tcg.c b/tcg/tcg.c index 2aaf5f2..9e78eb5 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -303,14 +303,12 @@ void tcg_func_start(TCGContext *s) gen_opparam_ptr = gen_opparam_buf; #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) /* initialize qemu_ld/st labels which help to generate TLB miss case codes at the end of TB */ -#if defined(__i386__) || defined(__x86_64__) s->qemu_ldst_labels = tcg_malloc(sizeof(TCGLabelQemuLdst) * TCG_MAX_QEMU_LDST); if (!s->qemu_ldst_labels) { tcg_abort(); } s->nb_qemu_ldst_labels = 0; #endif -#endif } static inline void tcg_temp_alloc(TCGContext *s, int n) @@ -2174,11 +2172,9 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, } the_end: #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -#if defined(__i386__) || defined(__x86_64__) /* Generate MMU call helpers at the end of block (currently only for qemu_ld/st) */ tcg_out_qemu_ldst_slow_path(s); #endif -#endif return -1; } diff --git a/tcg/tcg.h b/tcg/tcg.h index 0588f75..0f2dbe2 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -188,7 +188,6 @@ typedef tcg_target_ulong TCGArg; */ #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -#if defined(__i386__) || defined(__x86_64__) /* Macros and structures for qemu_ld/st IR code optimization: It looks good for TCG_MAX_HELPER_LABELS to be half of OPC_BUF_SIZE in exec-all.h. */ #define TCG_MAX_QEMU_LDST 320 @@ -207,9 +206,8 @@ typedef struct TCGLabelQemuLdst { int datahi_reg; /* reg index for the high word to be loaded or to be stored */ int mem_index; /* soft MMU memory index */ uint8_t *raddr; /* return address (located end of TB) */ - uint32_t *label_ptr[2]; /* label pointers to be updated */ + uint8_t *label_ptr[2]; /* label pointers to be updated */ } TCGLabelQemuLdst; -#endif #endif /* CONFIG_QEMU_LDST_OPTIMIZATION */ #ifdef CONFIG_DEBUG_TCG @@ -619,7 +617,7 @@ extern uint8_t code_gen_prologue[]; ((tcg_target_ulong (*)(void *, void *))code_gen_prologue)(env, tb_ptr) #endif -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) /* qemu_ld/st generation at the end of TB */ void tcg_out_qemu_ldst_slow_path(TCGContext *s); #endif diff --git a/tizen/qemu_configure_arm.sh b/tizen/qemu_configure_arm.sh index 0dab83e..742c5a6 100755 --- a/tizen/qemu_configure_arm.sh +++ b/tizen/qemu_configure_arm.sh @@ -14,10 +14,10 @@ exec ./configure \ --disable-vnc-tls \ --audio-card-list=ac97 \ --enable-opengles \ + --enable-ldst-optimization \ --enable-maru \ --disable-pie # --enable-mixemu \ -# --enable-ldst-optimization \ # --enable-gl # --enable-ffmpeg # --enable-v4l2 \ @@ -33,8 +33,8 @@ exec ./configure \ --disable-vnc-tls \ --audio-card-list=ac97 \ --enable-hax \ + --enable-ldst-optimization \ --enable-maru -# --enable-ldst-optimization \ # --enable-gl # --enable-ffmpeg # --disable-vnc-jpeg \