From fca8a500d519a56abeaedf8073167a61d3c6b9c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 1 Sep 2015 19:11:45 -0700 Subject: [PATCH] tcg: Save insn data and use it in cpu_restore_state_from_tb We can now restore state without retranslation. Reviewed-by: Aurelien Jarno Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 1 + tcg/tcg.c | 40 ++++++++----- tcg/tcg.h | 4 +- translate-all.c | 150 +++++++++++++++++++++++++++++++++++------------- 4 files changed, 140 insertions(+), 55 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 6a69802..402dd87 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -199,6 +199,7 @@ struct TranslationBlock { #define CF_USE_ICOUNT 0x20000 void *tc_ptr; /* pointer to the translated code */ + uint8_t *tc_search; /* pointer to search data */ /* next matching tb for physical address. */ struct TranslationBlock *phys_hash_next; /* original tb when cflags has CF_NOCACHE */ diff --git a/tcg/tcg.c b/tcg/tcg.c index bdb83d9..40f24de 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -2294,7 +2294,7 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_insn_unit *gen_code_buf, long search_pc) { - int i, oi, oi_next; + int i, oi, oi_next, num_insns; #ifdef DEBUG_DISAS if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) { @@ -2338,6 +2338,7 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_out_tb_init(s); + num_insns = -1; for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { TCGOp * const op = &s->gen_op_buf[oi]; TCGArg * const args = &s->gen_opparam_buf[op->args]; @@ -2361,6 +2362,10 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_reg_alloc_movi(s, args, dead_args, sync_args); break; case INDEX_op_insn_start: + if (num_insns >= 0) { + s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); + } + num_insns++; for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { target_ulong a; #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS @@ -2368,7 +2373,7 @@ static inline int tcg_gen_code_common(TCGContext *s, #else a = args[i]; #endif - s->gen_opc_data[i] = a; + s->gen_insn_data[num_insns][i] = a; } break; case INDEX_op_discard: @@ -2400,6 +2405,8 @@ static inline int tcg_gen_code_common(TCGContext *s, check_regs(s); #endif } + tcg_debug_assert(num_insns >= 0); + s->gen_insn_end_off[num_insns] = tcg_current_code_size(s); /* Generate TB finalization at the end of block */ tcg_out_tb_finalize(s); @@ -2448,24 +2455,26 @@ int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf, void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) { TCGContext *s = &tcg_ctx; - int64_t tot; + int64_t tb_count = s->tb_count; + int64_t tb_div_count = tb_count ? tb_count : 1; + int64_t tot = s->interm_time + s->code_time; - tot = s->interm_time + s->code_time; cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", tot, tot / 2.4e9); cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", - s->tb_count, - s->tb_count1 - s->tb_count, - s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0); + tb_count, s->tb_count1 - tb_count, + (double)(s->tb_count1 - s->tb_count) + / (s->tb_count1 ? s->tb_count1 : 1) * 100.0); cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n", - s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max); + (double)s->op_count / tb_div_count, s->op_count_max); cpu_fprintf(f, "deleted ops/TB %0.2f\n", - s->tb_count ? - (double)s->del_op_count / s->tb_count : 0); + (double)s->del_op_count / tb_div_count); cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n", - s->tb_count ? - (double)s->temp_count / s->tb_count : 0, - s->temp_count_max); + (double)s->temp_count / tb_div_count, s->temp_count_max); + cpu_fprintf(f, "avg host code/TB %0.1f\n", + (double)s->code_out_len / tb_div_count); + cpu_fprintf(f, "avg search data/TB %0.1f\n", + (double)s->search_out_len / tb_div_count); cpu_fprintf(f, "cycles/op %0.1f\n", s->op_count ? (double)tot / s->op_count : 0); @@ -2473,8 +2482,11 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) s->code_in_len ? (double)tot / s->code_in_len : 0); cpu_fprintf(f, "cycles/out byte %0.1f\n", s->code_out_len ? (double)tot / s->code_out_len : 0); - if (tot == 0) + cpu_fprintf(f, "cycles/search byte %0.1f\n", + s->search_out_len ? (double)tot / s->search_out_len : 0); + if (tot == 0) { tot = 1; + } cpu_fprintf(f, " gen_interm time %0.1f%%\n", (double)s->interm_time / tot * 100.0); cpu_fprintf(f, " gen_code time %0.1f%%\n", diff --git a/tcg/tcg.h b/tcg/tcg.h index 8fd1252..df499c6 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -532,6 +532,7 @@ struct TCGContext { int64_t del_op_count; int64_t code_in_len; int64_t code_out_len; + int64_t search_out_len; int64_t interm_time; int64_t code_time; int64_t la_time; @@ -581,7 +582,8 @@ struct TCGContext { uint16_t gen_opc_icount[OPC_BUF_SIZE]; uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; - target_ulong gen_opc_data[TARGET_INSN_START_WORDS]; + uint16_t gen_insn_end_off[TCG_MAX_INSNS]; + target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; }; extern TCGContext tcg_ctx; diff --git a/translate-all.c b/translate-all.c index 9f801ae..3454f4e 100644 --- a/translate-all.c +++ b/translate-all.c @@ -168,61 +168,128 @@ void cpu_gen_init(void) tcg_context_init(&tcg_ctx); } +/* Encode VAL as a signed leb128 sequence at P. + Return P incremented past the encoded value. */ +static uint8_t *encode_sleb128(uint8_t *p, target_long val) +{ + int more, byte; + + do { + byte = val & 0x7f; + val >>= 7; + more = !((val == 0 && (byte & 0x40) == 0) + || (val == -1 && (byte & 0x40) != 0)); + if (more) { + byte |= 0x80; + } + *p++ = byte; + } while (more); + + return p; +} + +/* Decode a signed leb128 sequence at *PP; increment *PP past the + decoded value. Return the decoded value. */ +static target_long decode_sleb128(uint8_t **pp) +{ + uint8_t *p = *pp; + target_long val = 0; + int byte, shift = 0; + + do { + byte = *p++; + val |= (target_ulong)(byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + if (shift < TARGET_LONG_BITS && (byte & 0x40)) { + val |= -(target_ulong)1 << shift; + } + + *pp = p; + return val; +} + +/* Encode the data collected about the instructions while compiling TB. + Place the data at BLOCK, and return the number of bytes consumed. + + The logical table consisits of TARGET_INSN_START_WORDS target_ulong's, + which come from the target's insn_start data, followed by a uintptr_t + which comes from the host pc of the end of the code implementing the insn. + + Each line of the table is encoded as sleb128 deltas from the previous + line. The seed for the first line is { tb->pc, 0..., tb->tc_ptr }. + That is, the first column is seeded with the guest pc, the last column + with the host pc, and the middle columns with zeros. */ + +static int encode_search(TranslationBlock *tb, uint8_t *block) +{ + uint8_t *p = block; + int i, j, n; + + tb->tc_search = block; + + for (i = 0, n = tb->icount; i < n; ++i) { + target_ulong prev; + + for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + if (i == 0) { + prev = (j == 0 ? tb->pc : 0); + } else { + prev = tcg_ctx.gen_insn_data[i - 1][j]; + } + p = encode_sleb128(p, tcg_ctx.gen_insn_data[i][j] - prev); + } + prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]); + p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev); + } + + return p - block; +} + /* The cpu state corresponding to 'searched_pc' is restored. */ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc) { + target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; + uintptr_t host_pc = (uintptr_t)tb->tc_ptr; CPUArchState *env = cpu->env_ptr; - TCGContext *s = &tcg_ctx; - int j; - uintptr_t tc_ptr; + uint8_t *p = tb->tc_search; + int i, j, num_insns = tb->icount; #ifdef CONFIG_PROFILER - int64_t ti; + int64_t ti = profile_getclock(); #endif -#ifdef CONFIG_PROFILER - ti = profile_getclock(); -#endif - tcg_func_start(s); + if (searched_pc < host_pc) { + return -1; + } - gen_intermediate_code_pc(env, tb); + /* Reconstruct the stored insn data while looking for the point at + which the end of the insn exceeds the searched_pc. */ + for (i = 0; i < num_insns; ++i) { + for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { + data[j] += decode_sleb128(&p); + } + host_pc += decode_sleb128(&p); + if (host_pc > searched_pc) { + goto found; + } + } + return -1; + found: if (tb->cflags & CF_USE_ICOUNT) { assert(use_icount); /* Reset the cycle counter to the start of the block. */ - cpu->icount_decr.u16.low += tb->icount; + cpu->icount_decr.u16.low += num_insns; /* Clear the IO flag. */ cpu->can_do_io = 0; } - - /* find opc index corresponding to search_pc */ - tc_ptr = (uintptr_t)tb->tc_ptr; - if (searched_pc < tc_ptr) - return -1; - - s->tb_next_offset = tb->tb_next_offset; -#ifdef USE_DIRECT_JUMP - s->tb_jmp_offset = tb->tb_jmp_offset; - s->tb_next = NULL; -#else - s->tb_jmp_offset = NULL; - s->tb_next = tb->tb_next; -#endif - j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr, - searched_pc - tc_ptr); - if (j < 0) - return -1; - /* now find start of instruction before */ - while (s->gen_opc_instr_start[j] == 0) { - j--; - } - cpu->icount_decr.u16.low -= s->gen_opc_icount[j]; - - restore_state_to_opc(env, tb, s->gen_opc_data); + cpu->icount_decr.u16.low -= i; + restore_state_to_opc(env, tb, data); #ifdef CONFIG_PROFILER - s->restore_time += profile_getclock() - ti; - s->restore_count++; + tcg_ctx.restore_time += profile_getclock() - ti; + tcg_ctx.restore_count++; #endif return 0; } @@ -969,7 +1036,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb_page_addr_t phys_pc, phys_page2; target_ulong virt_page2; tcg_insn_unit *gen_code_buf; - int gen_code_size; + int gen_code_size, search_size; #ifdef CONFIG_PROFILER int64_t ti; #endif @@ -1025,11 +1092,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #endif gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf); + search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); #ifdef CONFIG_PROFILER tcg_ctx.code_time += profile_getclock(); tcg_ctx.code_in_len += tb->size; tcg_ctx.code_out_len += gen_code_size; + tcg_ctx.search_out_len += search_size; #endif #ifdef DEBUG_DISAS @@ -1041,8 +1110,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #endif - tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)gen_code_buf + - gen_code_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); + tcg_ctx.code_gen_ptr = (void *) + ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, + CODE_GEN_ALIGN); /* check next page if needed */ virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK; -- 2.7.4