From 5bfd75a35c11dd3aa61c73d0d2cd88137c31519c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 2 Oct 2015 22:25:28 +0000 Subject: [PATCH] tcg/ppc: Revise goto_tb implementation Restrict the size of code_gen_buffer to 2GB on ppc64, which lets us assert that everything is reachable with addis+addi from tb_ret_addr. This lets us use a max of 4 insns for goto_tb instead of 7. Emit the indirect branch portion of goto_tb up front, which means we only have to update two insns to update any link. With a 64-bit store, we can update the link atomically, which may be required in future. Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 49 ++++++++++++++++++++++++++++++++++++++----------- translate-all.c | 2 ++ 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index fd7a3e0..cee13e0 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -1239,11 +1239,36 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args, void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) { - TCGContext s; + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; - s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr; - tcg_out_b(&s, 0, (tcg_insn_unit *)addr); - flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s)); + if (in_range_b(diff)) { + i1 = B | (diff & 0x3fffffc); + i2 = NOP; + } else if (USE_REG_RA) { + intptr_t lo, hi; + diff = addr - (uintptr_t)tb_ret_addr; + lo = (int16_t)diff; + hi = (int32_t)(diff - lo); + assert(diff == hi + lo); + i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16); + i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo); + } else { + assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr); + i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16); + i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr); + } +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + + /* ??? __atomic_store_8, presuming there's some way to do that + for 32-bit, otherwise this is good enough for 64-bit. */ + *(uint64_t *)jmp_addr = pair; + flush_icache_range(jmp_addr, jmp_addr + 8); } static void tcg_out_call(TCGContext *s, tcg_insn_unit *target) @@ -1869,14 +1894,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out_b(s, 0, tb_ret_addr); break; case INDEX_op_goto_tb: - if (s->tb_jmp_offset) { - /* Direct jump method. */ - s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); - s->code_ptr += 7; - } else { - /* Indirect jump method. */ - tcg_abort(); + tcg_debug_assert(s->tb_jmp_offset); + /* Direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); } + s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); + /* To be replaced by either a branch+nop or a load into TMP1. */ + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; case INDEX_op_br: diff --git a/translate-all.c b/translate-all.c index 333eba4..20ce40e 100644 --- a/translate-all.c +++ b/translate-all.c @@ -468,6 +468,8 @@ static inline PageDesc *page_find(tb_page_addr_t index) # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) #elif defined(__sparc__) # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) +#elif defined(__powerpc64__) +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) #elif defined(__aarch64__) # define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) #elif defined(__arm__) -- 2.7.4