/* Extended versions of MMU helpers for qemu_ld/st optimization.
They get return address arguments because the caller PCs are not where helpers return to. */
#if defined(__i386__) || defined(__x86_64__)
-uint8_t __ldextb_mmu(target_ulong addr, int mmu_idx, void *ra);
-void __stextb_mmu(target_ulong addr, uint8_t val, int mmu_idx, void *ra);
-uint16_t __ldextw_mmu(target_ulong addr, int mmu_idx, void *ra);
-void __stextw_mmu(target_ulong addr, uint16_t val, int mmu_idx, void *ra);
-uint32_t __ldextl_mmu(target_ulong addr, int mmu_idx, void *ra);
-void __stextl_mmu(target_ulong addr, uint32_t val, int mmu_idx, void *ra);
-uint64_t __ldextq_mmu(target_ulong addr, int mmu_idx, void *ra);
-void __stextq_mmu(target_ulong addr, uint64_t val, int mmu_idx, void *ra);
+uint8_t __ldextb_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr);
+void __stextb_mmu(target_ulong addr, uint8_t val, int mmu_idx, uintptr_t retaddr);
+uint16_t __ldextw_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr);
+void __stextw_mmu(target_ulong addr, uint16_t val, int mmu_idx, uintptr_t retaddr);
+uint32_t __ldextl_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr);
+void __stextl_mmu(target_ulong addr, uint32_t val, int mmu_idx, uintptr_t retaddr);
+uint64_t __ldextq_mmu(target_ulong addr, int mmu_idx, uintptr_t retaddr);
+void __stextq_mmu(target_ulong addr, uint64_t val, int mmu_idx, uintptr_t retaddr);
#endif
#endif /* CONFIG_QEMU_LDST_OPTIMIZATION */
They get return address arguments because the caller PCs are not where helpers return to.
!defined(SOFTMMU_CODE_ACCESS) suppress warnings from exec.c */
#if defined(__i386__) || defined(__x86_64__)
-DATA_TYPE glue(glue(__ldext, SUFFIX), MMUSUFFIX)(target_ulong addr,
+DATA_TYPE glue(glue(__ldext, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr,
int mmu_idx,
- void *ra)
+ uintptr_t retaddr)
{
DATA_TYPE res;
int index;
target_ulong tlb_addr;
target_phys_addr_t ioaddr;
- unsigned long addend;
- void *retaddr;
/* test if there is match for unaligned or IO access */
/* XXX: could done more in memory macro in a non portable way */
/* IO access */
if ((addr & (DATA_SIZE - 1)) != 0)
goto do_unaligned_access;
- retaddr = ra;
ioaddr = env->iotlb[mmu_idx][index];
- res = glue(io_read, SUFFIX)(ioaddr, addr, retaddr);
+ res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
} else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
/* slow unaligned access (it spans two pages or IO) */
do_unaligned_access:
- retaddr = ra;
#ifdef ALIGNED_ONLY
- do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
#endif
- res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr,
+ res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
mmu_idx, retaddr);
} else {
/* unaligned/aligned access in the same page */
+ uintptr_t addend;
#ifdef ALIGNED_ONLY
if ((addr & (DATA_SIZE - 1)) != 0) {
- retaddr = ra;
- do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
}
#endif
+ addend = env->tlb_table[mmu_idx][index].addend;
+ res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t)
+ (addr + addend));
}
} else {
/* the page is not in the TLB : fill it */
- retaddr = ra;
#ifdef ALIGNED_ONLY
if ((addr & (DATA_SIZE - 1)) != 0)
- do_unaligned_access(addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
#endif
tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
goto redo;
#if defined(__i386__) || defined(__x86_64__)
/* Extended versions of MMU helpers for qemu_st IR optimization.
They get return address arguments because the caller PCs are not where helpers return to. */
-void glue(glue(__stext, SUFFIX), MMUSUFFIX)(target_ulong addr,
+void glue(glue(__stext, SUFFIX), MMUSUFFIX)(ENV_PARAM target_ulong addr,
DATA_TYPE val,
int mmu_idx,
- void *ra)
+ uintptr_t retaddr)
{
target_phys_addr_t ioaddr;
- unsigned long addend;
target_ulong tlb_addr;
- void *retaddr;
int index;
index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
/* IO access */
if ((addr & (DATA_SIZE - 1)) != 0)
goto do_unaligned_access;
- retaddr = ra;
ioaddr = env->iotlb[mmu_idx][index];
- glue(io_write, SUFFIX)(ioaddr, val, addr, retaddr);
+ glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
} else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
do_unaligned_access:
- retaddr = ra;
#ifdef ALIGNED_ONLY
- do_unaligned_access(addr, 1, mmu_idx, retaddr);
+ do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
#endif
- glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val,
+ glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val,
mmu_idx, retaddr);
} else {
/* aligned/unaligned access in the same page */
+ uintptr_t addend;
#ifdef ALIGNED_ONLY
if ((addr & (DATA_SIZE - 1)) != 0) {
- retaddr = ra;
do_unaligned_access(addr, 1, mmu_idx, retaddr);
}
#endif
+ addend = env->tlb_table[mmu_idx][index].addend;
+ glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t)
+ (addr + addend), val);
}
} else {
/* the page is not in the TLB : fill it */
- retaddr = ra;
#ifdef ALIGNED_ONLY
if ((addr & (DATA_SIZE - 1)) != 0)
- do_unaligned_access(addr, 1, mmu_idx, retaddr);
+ do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
#endif
tlb_fill(env, addr, 1, mmu_idx, retaddr);
goto redo;
/* generates slow case of qemu_ld at the end of TB */
static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
{
- int s_bits, arg_idx;
+ int s_bits;
int opc = label->opc_ext & HL_OPC_MASK;
int mem_index = label->mem_index;
int data_reg = label->datalo_reg;
int data_reg2 = label->datahi_reg;
+ int addrlo_reg = label->addrlo_reg;
int addrhi_reg = label->addrhi_reg;
uint8_t *raddr = label->raddr;
uint32_t **label_ptr = &label->label_ptr[0];
+#if TCG_TARGET_REG_BITS == 64
+ int arg_idx;
+#else
+ int stack_adjust;
+#endif
s_bits = opc & 3;
*label_ptr[1] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[1] - 4);
}
- /* 1st parameter(vaddr) has been alreay set in %eax */
- arg_idx = 1;
- if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
- addrhi_reg);
+#if TCG_TARGET_REG_BITS == 32
+ tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
+ tcg_out_pushi(s, mem_index);
+ stack_adjust = 8;
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_push(s, addrhi_reg);
+ /* 4 bytes addrhi_reg and +4 bytes (raddr - 1) */
+ stack_adjust += 8;
}
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
+ tcg_out_push(s, addrlo_reg);
+ stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+ tcg_out_push(s, TCG_AREG0);
+ stack_adjust += 4;
+#endif
+#else
+ /* The first argument is already loaded with addrlo. */
+ arg_idx = 1;
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
mem_index);
- /* return address should indicate qemu_ld IR codes */
- if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
- /* 4 word parameters */
- tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
- } else {
- /* 3 word parameters */
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, (tcg_target_long)(raddr - 1));
- }
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, (tcg_target_long)(raddr - 1));
+#ifdef CONFIG_TCG_PASS_AREG0
+ /* XXX/FIXME: suboptimal */
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+ tcg_target_call_iarg_regs[2]);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+ tcg_target_call_iarg_regs[1]);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+ tcg_target_call_iarg_regs[0]);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+ TCG_AREG0);
+#endif
+#endif
+
tcg_out_calli(s, (tcg_target_long)qemu_ldext_helpers[s_bits]);
- if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+
+#if TCG_TARGET_REG_BITS == 32
+ if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
/* Pop and discard. This is 2 bytes smaller than the add. */
tcg_out_pop(s, TCG_REG_ECX);
+ } else if (stack_adjust != 0) {
+ tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
}
+#endif
switch(opc) {
case 0 | 4:
default:
tcg_abort();
}
-
/* jump back to original code */
- tcg_out_jmp(s, (tcg_target_long) raddr);
+ tcg_out_jmp(s, (tcg_target_long)raddr);
}
/* generates slow case of qemu_st at the end of TB */
int mem_index = label->mem_index;
int data_reg = label->datalo_reg;
int data_reg2 = label->datahi_reg;
+ int addrlo_reg = label->addrlo_reg;
int addrhi_reg = label->addrhi_reg;
uint8_t *raddr = label->raddr;
uint32_t **label_ptr = &label->label_ptr[0];
*label_ptr[1] = (uint32_t)(s->code_ptr - (uint8_t *)label_ptr[1] - 4);
}
- /* 1st parameter(vaddr) has been already set */
- /* return address should indicate qemu_st IR codes */
- if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
- TCG_REG_RSI, data_reg);
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
- /* return address should indicate qemu_st IR codes */
- /* stack growth: 1word * 64bit */
- tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
- stack_adjust = 8;
- } else if (TARGET_LONG_BITS == 32) {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
- if (opc == 3) {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
- tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
- tcg_out_pushi(s, mem_index);
- stack_adjust = 8;
- } else {
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
- tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
- stack_adjust = 4;
- }
- } else {
- if (opc == 3) {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, addrhi_reg);
- tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
- tcg_out_pushi(s, mem_index);
- tcg_out_push(s, data_reg2);
- tcg_out_push(s, data_reg);
- stack_adjust = 16;
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, addrhi_reg);
- switch(opc) {
- case 0:
- tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
- break;
- case 1:
- tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
- break;
- case 2:
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
- break;
- }
- tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
- tcg_out_pushi(s, mem_index);
- stack_adjust = 8;
- }
+#if TCG_TARGET_REG_BITS == 32
+ tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
+ tcg_out_pushi(s, mem_index);
+ stack_adjust = 8;
+ if (opc == 3) {
+ tcg_out_push(s, data_reg2);
+ stack_adjust += 4;
}
+ tcg_out_push(s, data_reg);
+ stack_adjust += 4;
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_push(s, addrhi_reg);
+ stack_adjust += 4;
+ }
+ tcg_out_push(s, addrlo_reg);
+ stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+ tcg_out_push(s, TCG_AREG0);
+ stack_adjust += 4;
+#endif
+#else
+ tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ tcg_target_call_iarg_regs[1], data_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+ /* return address should indicate qemu_st IR codes */
+ /* stack growth: 1word * 64bit */
+ tcg_out_pushi(s, (tcg_target_long)(raddr - 1));
+ stack_adjust = 8;
+#ifdef CONFIG_TCG_PASS_AREG0
+ /* XXX/FIXME: suboptimal */
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+ tcg_target_call_iarg_regs[2]);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+ tcg_target_call_iarg_regs[1]);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+ tcg_target_call_iarg_regs[0]);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+ TCG_AREG0);
+#endif
+#endif
tcg_out_calli(s, (tcg_target_long)qemu_stext_helpers[s_bits]);
/* Pop and discard. This is 2 bytes smaller than the add. */
tcg_out_pop(s, TCG_REG_ECX);
} else if (stack_adjust != 0) {
- tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
+ tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
}
/* jump back to original code */
uint32_t *label_ptr[2];
data_reg = args[0];
+ label_ptr[1] = 0;
addrlo_idx = 1;
if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
data_reg2 = args[1];
uint32_t *label_ptr[2];
data_reg = args[0];
+ label_ptr[1] = 0;
addrlo_idx = 1;
if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
data_reg2 = args[1];