From c45cb8bb89fc798489869982c4c463b26ce43d7b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 19 Sep 2014 13:49:15 -0700 Subject: [PATCH] tcg: Put opcodes in a linked list The previous setup required ops and args to be completely sequential, and was error prone when it came to both iteration and optimization. Reviewed-by: Bastian Koppelmann Signed-off-by: Richard Henderson --- include/exec/gen-icount.h | 22 ++- tcg/optimize.c | 286 ++++++++++++++--------------------- tcg/tcg-op.c | 190 ++++++++++++----------- tcg/tcg.c | 376 +++++++++++++++++++--------------------------- tcg/tcg.h | 58 ++++--- 5 files changed, 431 insertions(+), 501 deletions(-) diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index a37a61d..6e5b012 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -11,8 +11,8 @@ static int exitreq_label; static inline void gen_tb_start(TranslationBlock *tb) { - TCGv_i32 count; - TCGv_i32 flag; + TCGv_i32 count, flag, imm; + int i; exitreq_label = gen_new_label(); flag = tcg_temp_new_i32(); @@ -21,16 +21,25 @@ static inline void gen_tb_start(TranslationBlock *tb) tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label); tcg_temp_free_i32(flag); - if (!(tb->cflags & CF_USE_ICOUNT)) + if (!(tb->cflags & CF_USE_ICOUNT)) { return; + } icount_label = gen_new_label(); count = tcg_temp_local_new_i32(); tcg_gen_ld_i32(count, cpu_env, -ENV_OFFSET + offsetof(CPUState, icount_decr.u32)); + + imm = tcg_temp_new_i32(); + tcg_gen_movi_i32(imm, 0xdeadbeef); + /* This is a horrid hack to allow fixing up the value later. */ - icount_arg = tcg_ctx.gen_opparam_ptr + 1; - tcg_gen_subi_i32(count, count, 0xdeadbeef); + i = tcg_ctx.gen_last_op_idx; + i = tcg_ctx.gen_op_buf[i].args; + icount_arg = &tcg_ctx.gen_opparam_buf[i + 1]; + + tcg_gen_sub_i32(count, count, imm); + tcg_temp_free_i32(imm); tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label); tcg_gen_st16_i32(count, cpu_env, @@ -49,7 +58,8 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns) tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED); } - *tcg_ctx.gen_opc_ptr = INDEX_op_end; + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; } static inline void gen_io_start(void) diff --git a/tcg/optimize.c b/tcg/optimize.c index 34ae3c2..f2b8acf 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -162,13 +162,13 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return false; } -static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args, +static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, TCGOpcode old_op, TCGArg dst, TCGArg src) { TCGOpcode new_op = op_to_mov(old_op); tcg_target_ulong mask; - s->gen_opc_buf[op_index] = new_op; + op->opc = new_op; reset_temp(dst); mask = temps[src].mask; @@ -193,17 +193,17 @@ static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args, temps[src].next_copy = dst; } - gen_args[0] = dst; - gen_args[1] = src; + args[0] = dst; + args[1] = src; } -static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args, +static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, TCGOpcode old_op, TCGArg dst, TCGArg val) { TCGOpcode new_op = op_to_movi(old_op); tcg_target_ulong mask; - s->gen_opc_buf[op_index] = new_op; + op->opc = new_op; reset_temp(dst); temps[dst].state = TCG_TEMP_CONST; @@ -215,8 +215,8 @@ static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args, } temps[dst].mask = mask; - gen_args[0] = dst; - gen_args[1] = val; + args[0] = dst; + args[1] = val; } static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) @@ -533,11 +533,9 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2) } /* Propagate constants and copies, fold constant expressions. */ -static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, - TCGArg *args, TCGOpDef *tcg_op_defs) +static void tcg_constant_folding(TCGContext *s) { - int nb_ops, op_index, nb_temps, nb_globals; - TCGArg *gen_args; + int oi, oi_next, nb_temps, nb_globals; /* Array VALS has an element for each temp. If this temp holds a constant then its value is kept in VALS' element. @@ -548,24 +546,23 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, nb_globals = s->nb_globals; reset_all_temps(nb_temps); - nb_ops = tcg_opc_ptr - s->gen_opc_buf; - gen_args = args; - for (op_index = 0; op_index < nb_ops; op_index++) { - TCGOpcode op = s->gen_opc_buf[op_index]; - const TCGOpDef *def = &tcg_op_defs[op]; + for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { tcg_target_ulong mask, partmask, affected; - int nb_oargs, nb_iargs, nb_args, i; + int nb_oargs, nb_iargs, i; TCGArg tmp; - if (op == INDEX_op_call) { - *gen_args++ = tmp = *args++; - nb_oargs = tmp >> 16; - nb_iargs = tmp & 0xffff; - nb_args = nb_oargs + nb_iargs + def->nb_cargs; + TCGOp * const op = &s->gen_op_buf[oi]; + TCGArg * const args = &s->gen_opparam_buf[op->args]; + TCGOpcode opc = op->opc; + const TCGOpDef *def = &tcg_op_defs[opc]; + + oi_next = op->next; + if (opc == INDEX_op_call) { + nb_oargs = op->callo; + nb_iargs = op->calli; } else { nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; - nb_args = def->nb_args; } /* Do copy propagation */ @@ -576,7 +573,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } /* For commutative operations make constant second argument */ - switch (op) { + switch (opc) { CASE_OP_32_64(add): CASE_OP_32_64(mul): CASE_OP_32_64(and): @@ -634,7 +631,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", and "sub r, 0, a => neg r, a" case. */ - switch (op) { + switch (opc) { CASE_OP_32_64(shl): CASE_OP_32_64(shr): CASE_OP_32_64(sar): @@ -642,9 +639,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(rotr): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); - args += 3; - gen_args += 2; + tcg_opt_gen_movi(s, op, args, opc, args[0], 0); continue; } break; @@ -657,7 +652,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Proceed with possible constant folding. */ break; } - if (op == INDEX_op_sub_i32) { + if (opc == INDEX_op_sub_i32) { neg_op = INDEX_op_neg_i32; have_neg = TCG_TARGET_HAS_neg_i32; } else { @@ -669,12 +664,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[1]].val == 0) { - s->gen_opc_buf[op_index] = neg_op; + op->opc = neg_op; reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[2]; - args += 3; - gen_args += 2; + args[1] = args[2]; continue; } } @@ -728,12 +720,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (!have_not) { break; } - s->gen_opc_buf[op_index] = not_op; + op->opc = not_op; reset_temp(args[0]); - gen_args[0] = args[0]; - gen_args[1] = args[i]; - args += 3; - gen_args += 2; + args[1] = args[i]; continue; } default: @@ -741,7 +730,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } /* Simplify expression for "op r, a, const => mov r, a" cases */ - switch (op) { + switch (opc) { CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(shl): @@ -769,12 +758,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, break; do_mov3: if (temps_are_copies(args[0], args[1])) { - s->gen_opc_buf[op_index] = INDEX_op_nop; + op->opc = INDEX_op_nop; } else { - tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); - gen_args += 2; + tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); } - args += 3; continue; default: break; @@ -784,7 +771,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, output argument is supported. */ mask = -1; affected = -1; - switch (op) { + switch (opc) { CASE_OP_32_64(ext8s): if ((temps[args[1]].mask & 0x80) != 0) { break; @@ -923,38 +910,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (partmask == 0) { assert(nb_oargs == 1); - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); - args += nb_args; - gen_args += 2; + tcg_opt_gen_movi(s, op, args, opc, args[0], 0); continue; } if (affected == 0) { assert(nb_oargs == 1); if (temps_are_copies(args[0], args[1])) { - s->gen_opc_buf[op_index] = INDEX_op_nop; + op->opc = INDEX_op_nop; } else if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); - gen_args += 2; + tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); } else { - tcg_opt_gen_movi(s, op_index, gen_args, op, + tcg_opt_gen_movi(s, op, args, opc, args[0], temps[args[1]].val); - gen_args += 2; } - args += nb_args; continue; } /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ - switch (op) { + switch (opc) { CASE_OP_32_64(and): CASE_OP_32_64(mul): CASE_OP_32_64(muluh): CASE_OP_32_64(mulsh): if ((temps[args[2]].state == TCG_TEMP_CONST && temps[args[2]].val == 0)) { - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); - args += 3; - gen_args += 2; + tcg_opt_gen_movi(s, op, args, opc, args[0], 0); continue; } break; @@ -963,18 +943,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } /* Simplify expression for "op r, a, a => mov r, a" cases */ - switch (op) { + switch (opc) { CASE_OP_32_64(or): CASE_OP_32_64(and): if (temps_are_copies(args[1], args[2])) { if (temps_are_copies(args[0], args[1])) { - s->gen_opc_buf[op_index] = INDEX_op_nop; + op->opc = INDEX_op_nop; } else { - tcg_opt_gen_mov(s, op_index, gen_args, op, - args[0], args[1]); - gen_args += 2; + tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); } - args += 3; continue; } break; @@ -983,14 +960,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } /* Simplify expression for "op r, a, a => movi r, 0" cases */ - switch (op) { + switch (opc) { CASE_OP_32_64(andc): CASE_OP_32_64(sub): CASE_OP_32_64(xor): if (temps_are_copies(args[1], args[2])) { - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0); - gen_args += 2; - args += 3; + tcg_opt_gen_movi(s, op, args, opc, args[0], 0); continue; } break; @@ -1001,17 +976,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Propagate constants through copy operations and do constant folding. Constants will be substituted to arguments by register allocator where needed and possible. Also detect copies. */ - switch (op) { + switch (opc) { CASE_OP_32_64(mov): if (temps_are_copies(args[0], args[1])) { - args += 2; - s->gen_opc_buf[op_index] = INDEX_op_nop; + op->opc = INDEX_op_nop; break; } if (temps[args[1]].state != TCG_TEMP_CONST) { - tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]); - gen_args += 2; - args += 2; + tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]); break; } /* Source argument is constant. Rewrite the operation and @@ -1019,9 +991,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, args[1] = temps[args[1]].val; /* fallthrough */ CASE_OP_32_64(movi): - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]); - gen_args += 2; - args += 2; + tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]); break; CASE_OP_32_64(not): @@ -1033,20 +1003,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: if (temps[args[1]].state == TCG_TEMP_CONST) { - tmp = do_constant_folding(op, temps[args[1]].val, 0); - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); - gen_args += 2; - args += 2; + tmp = do_constant_folding(opc, temps[args[1]].val, 0); + tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); break; } goto do_default; case INDEX_op_trunc_shr_i32: if (temps[args[1]].state == TCG_TEMP_CONST) { - tmp = do_constant_folding(op, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); - gen_args += 2; - args += 3; + tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); + tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); break; } goto do_default; @@ -1075,11 +1041,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, CASE_OP_32_64(remu): if (temps[args[1]].state == TCG_TEMP_CONST && temps[args[2]].state == TCG_TEMP_CONST) { - tmp = do_constant_folding(op, temps[args[1]].val, + tmp = do_constant_folding(opc, temps[args[1]].val, temps[args[2]].val); - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); - gen_args += 2; - args += 3; + tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); break; } goto do_default; @@ -1089,54 +1053,44 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, && temps[args[2]].state == TCG_TEMP_CONST) { tmp = deposit64(temps[args[1]].val, args[3], args[4], temps[args[2]].val); - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); - gen_args += 2; - args += 5; + tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); break; } goto do_default; CASE_OP_32_64(setcond): - tmp = do_constant_folding_cond(op, args[1], args[2], args[3]); + tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); if (tmp != 2) { - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); - gen_args += 2; - args += 4; + tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); break; } goto do_default; CASE_OP_32_64(brcond): - tmp = do_constant_folding_cond(op, args[0], args[1], args[2]); + tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); if (tmp != 2) { if (tmp) { reset_all_temps(nb_temps); - s->gen_opc_buf[op_index] = INDEX_op_br; - gen_args[0] = args[3]; - gen_args += 1; + op->opc = INDEX_op_br; + args[0] = args[3]; } else { - s->gen_opc_buf[op_index] = INDEX_op_nop; + op->opc = INDEX_op_nop; } - args += 4; break; } goto do_default; CASE_OP_32_64(movcond): - tmp = do_constant_folding_cond(op, args[1], args[2], args[5]); + tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); if (tmp != 2) { if (temps_are_copies(args[0], args[4-tmp])) { - s->gen_opc_buf[op_index] = INDEX_op_nop; + op->opc = INDEX_op_nop; } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) { - tcg_opt_gen_movi(s, op_index, gen_args, op, + tcg_opt_gen_movi(s, op, args, opc, args[0], temps[args[4-tmp]].val); - gen_args += 2; } else { - tcg_opt_gen_mov(s, op_index, gen_args, op, - args[0], args[4-tmp]); - gen_args += 2; + tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]); } - args += 6; break; } goto do_default; @@ -1154,24 +1108,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, uint64_t a = ((uint64_t)ah << 32) | al; uint64_t b = ((uint64_t)bh << 32) | bl; TCGArg rl, rh; + TCGOp *op2; + TCGArg *args2; - if (op == INDEX_op_add2_i32) { + if (opc == INDEX_op_add2_i32) { a += b; } else { a -= b; } /* We emit the extra nop when we emit the add2/sub2. */ - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); + op2 = &s->gen_op_buf[oi_next]; + assert(op2->opc == INDEX_op_nop); + + /* But we still have to allocate args for the op. */ + op2->args = s->gen_next_parm_idx; + s->gen_next_parm_idx += 2; + args2 = &s->gen_opparam_buf[op2->args]; rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op_index, &gen_args[0], - op, rl, (uint32_t)a); - tcg_opt_gen_movi(s, ++op_index, &gen_args[2], - op, rh, (uint32_t)(a >> 32)); - gen_args += 4; - args += 6; + tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a); + tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32)); + + /* We've done all we need to do with the movi. Skip it. */ + oi_next = op2->next; break; } goto do_default; @@ -1183,18 +1144,25 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, uint32_t b = temps[args[3]].val; uint64_t r = (uint64_t)a * b; TCGArg rl, rh; + TCGOp *op2; + TCGArg *args2; /* We emit the extra nop when we emit the mulu2. */ - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); + op2 = &s->gen_op_buf[oi_next]; + assert(op2->opc == INDEX_op_nop); + + /* But we still have to allocate args for the op. */ + op2->args = s->gen_next_parm_idx; + s->gen_next_parm_idx += 2; + args2 = &s->gen_opparam_buf[op2->args]; rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op_index, &gen_args[0], - op, rl, (uint32_t)r); - tcg_opt_gen_movi(s, ++op_index, &gen_args[2], - op, rh, (uint32_t)(r >> 32)); - gen_args += 4; - args += 4; + tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r); + tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 32)); + + /* We've done all we need to do with the movi. Skip it. */ + oi_next = op2->next; break; } goto do_default; @@ -1205,12 +1173,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, if (tmp) { do_brcond_true: reset_all_temps(nb_temps); - s->gen_opc_buf[op_index] = INDEX_op_br; - gen_args[0] = args[5]; - gen_args += 1; + op->opc = INDEX_op_br; + args[0] = args[5]; } else { do_brcond_false: - s->gen_opc_buf[op_index] = INDEX_op_nop; + op->opc = INDEX_op_nop; } } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) && temps[args[2]].state == TCG_TEMP_CONST @@ -1221,12 +1188,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, vs the high word of the input. */ do_brcond_high: reset_all_temps(nb_temps); - s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; - gen_args[0] = args[1]; - gen_args[1] = args[3]; - gen_args[2] = args[4]; - gen_args[3] = args[5]; - gen_args += 4; + op->opc = INDEX_op_brcond_i32; + args[0] = args[1]; + args[1] = args[3]; + args[2] = args[4]; + args[3] = args[5]; } else if (args[4] == TCG_COND_EQ) { /* Simplify EQ comparisons where one of the pairs can be simplified. */ @@ -1246,12 +1212,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } do_brcond_low: reset_all_temps(nb_temps); - s->gen_opc_buf[op_index] = INDEX_op_brcond_i32; - gen_args[0] = args[0]; - gen_args[1] = args[2]; - gen_args[2] = args[4]; - gen_args[3] = args[5]; - gen_args += 4; + op->opc = INDEX_op_brcond_i32; + args[1] = args[2]; + args[2] = args[4]; + args[3] = args[5]; } else if (args[4] == TCG_COND_NE) { /* Simplify NE comparisons where one of the pairs can be simplified. */ @@ -1273,15 +1237,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } else { goto do_default; } - args += 6; break; case INDEX_op_setcond2_i32: tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); if (tmp != 2) { do_setcond_const: - tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp); - gen_args += 2; + tcg_opt_gen_movi(s, op, args, opc, args[0], tmp); } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) && temps[args[3]].state == TCG_TEMP_CONST && temps[args[4]].state == TCG_TEMP_CONST @@ -1290,14 +1252,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_setcond_high: - s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; reset_temp(args[0]); temps[args[0]].mask = 1; - gen_args[0] = args[0]; - gen_args[1] = args[2]; - gen_args[2] = args[4]; - gen_args[3] = args[5]; - gen_args += 4; + op->opc = INDEX_op_setcond_i32; + args[1] = args[2]; + args[2] = args[4]; + args[3] = args[5]; } else if (args[5] == TCG_COND_EQ) { /* Simplify EQ comparisons where one of the pairs can be simplified. */ @@ -1318,12 +1278,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, do_setcond_low: reset_temp(args[0]); temps[args[0]].mask = 1; - s->gen_opc_buf[op_index] = INDEX_op_setcond_i32; - gen_args[0] = args[0]; - gen_args[1] = args[1]; - gen_args[2] = args[3]; - gen_args[3] = args[5]; - gen_args += 4; + op->opc = INDEX_op_setcond_i32; + args[2] = args[3]; + args[3] = args[5]; } else if (args[5] == TCG_COND_NE) { /* Simplify NE comparisons where one of the pairs can be simplified. */ @@ -1345,7 +1302,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } else { goto do_default; } - args += 6; break; case INDEX_op_call: @@ -1377,22 +1333,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, } } } - for (i = 0; i < nb_args; i++) { - gen_args[i] = args[i]; - } - args += nb_args; - gen_args += nb_args; break; } } - - return gen_args; } -TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, - TCGArg *args, TCGOpDef *tcg_op_defs) +void tcg_optimize(TCGContext *s) { - TCGArg *res; - res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); - return res; + tcg_constant_folding(s); } diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 5305f1d..cbaa15c 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -35,100 +35,116 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); #define TCGV_HIGH TCGV_HIGH_link_error #endif +/* Note that this is optimized for sequential allocation during translate. + Up to and including filling in the forward link immediately. We'll do + proper termination of the end of the list after we finish translation. */ + +static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) +{ + int oi = ctx->gen_next_op_idx; + int ni = oi + 1; + int pi = oi - 1; + + tcg_debug_assert(oi < OPC_BUF_SIZE); + ctx->gen_last_op_idx = oi; + ctx->gen_next_op_idx = ni; + + ctx->gen_op_buf[oi] = (TCGOp){ + .opc = opc, + .args = args, + .prev = pi, + .next = ni + }; +} + void tcg_gen_op0(TCGContext *ctx, TCGOpcode opc) { - *ctx->gen_opc_ptr++ = opc; + tcg_emit_op(ctx, opc, -1); } void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; + tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 1; + ctx->gen_opparam_buf[pi] = a1; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 1; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; + tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 2; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 2; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; + tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 3; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 3; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; - opp[3] = a4; + tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 4; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; + ctx->gen_opparam_buf[pi + 3] = a4; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 4; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; - opp[3] = a4; - opp[4] = a5; + tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 5; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; + ctx->gen_opparam_buf[pi + 3] = a4; + ctx->gen_opparam_buf[pi + 4] = a5; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 5; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; - opp[3] = a4; - opp[4] = a5; - opp[5] = a6; + tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 6; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; + ctx->gen_opparam_buf[pi + 3] = a4; + ctx->gen_opparam_buf[pi + 4] = a5; + ctx->gen_opparam_buf[pi + 5] = a6; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 6; + tcg_emit_op(ctx, opc, pi); } /* 32 bit ops */ @@ -1862,53 +1878,57 @@ static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) return op; } -static inline void tcg_add_param_i32(TCGv_i32 val) -{ - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val); -} - -static inline void tcg_add_param_i64(TCGv_i64 val) +static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr, + TCGMemOp memop, TCGArg idx) { +#if TARGET_LONG_BITS == 32 + tcg_gen_op4ii_i32(opc, val, addr, memop, idx); +#else if (TCG_TARGET_REG_BITS == 32) { - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_LOW(val)); - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_HIGH(val)); + tcg_gen_op5ii_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), + memop, idx); } else { - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val); + tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr), + memop, idx); } +#endif } +static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr, + TCGMemOp memop, TCGArg idx) +{ #if TARGET_LONG_BITS == 32 -# define tcg_add_param_tl tcg_add_param_i32 + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op5ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), + addr, memop, idx); + } else { + tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr), + memop, idx); + } #else -# define tcg_add_param_tl tcg_add_param_i64 + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op6ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), + TCGV_LOW(addr), TCGV_HIGH(addr), memop, idx); + } else { + tcg_gen_op4ii_i64(opc, val, addr, memop, idx); + } #endif +} void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 0); - - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx); } void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 1); - - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); } void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) { - memop = tcg_canonicalize_memop(memop, 1, 0); - if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop); if (memop & MO_SIGN) { @@ -1919,25 +1939,17 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) return; } - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64; - tcg_add_param_i64(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + memop = tcg_canonicalize_memop(memop, 1, 0); + gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx); } void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) { - memop = tcg_canonicalize_memop(memop, 1, 1); - if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop); return; } - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64; - tcg_add_param_i64(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + memop = tcg_canonicalize_memop(memop, 1, 1); + gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 3470500..ee041b9 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -407,7 +407,6 @@ void tcg_func_start(TCGContext *s) /* No temps have been previously allocated for size or locality. */ memset(s->free_temps, 0, sizeof(s->free_temps)); - s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS); s->nb_labels = 0; s->current_frame_offset = s->frame_start; @@ -415,8 +414,10 @@ void tcg_func_start(TCGContext *s) s->goto_tb_issue_mask = 0; #endif - s->gen_opc_ptr = s->gen_opc_buf; - s->gen_opparam_ptr = s->gen_opparam_buf; + s->gen_first_op_idx = 0; + s->gen_last_op_idx = -1; + s->gen_next_op_idx = 0; + s->gen_next_parm_idx = 0; s->be = tcg_malloc(sizeof(TCGBackendData)); } @@ -703,9 +704,8 @@ int tcg_check_temp_count(void) void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, int nargs, TCGArg *args) { - int i, real_args, nb_rets; + int i, real_args, nb_rets, pi, pi_first; unsigned sizemask, flags; - TCGArg *nparam; TCGHelperInfo *info; info = g_hash_table_lookup(s->helpers, (gpointer)func); @@ -758,8 +758,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, } #endif /* TCG_TARGET_EXTEND_ARGS */ - *s->gen_opc_ptr++ = INDEX_op_call; - nparam = s->gen_opparam_ptr++; + pi_first = pi = s->gen_next_parm_idx; if (ret != TCG_CALL_DUMMY_ARG) { #if defined(__sparc__) && !defined(__arch64__) \ && !defined(CONFIG_TCG_INTERPRETER) @@ -769,25 +768,25 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, two return temporaries, and reassemble below. */ retl = tcg_temp_new_i64(); reth = tcg_temp_new_i64(); - *s->gen_opparam_ptr++ = GET_TCGV_I64(reth); - *s->gen_opparam_ptr++ = GET_TCGV_I64(retl); + s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth); + s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl); nb_rets = 2; } else { - *s->gen_opparam_ptr++ = ret; + s->gen_opparam_buf[pi++] = ret; nb_rets = 1; } #else if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) { #ifdef HOST_WORDS_BIGENDIAN - *s->gen_opparam_ptr++ = ret + 1; - *s->gen_opparam_ptr++ = ret; + s->gen_opparam_buf[pi++] = ret + 1; + s->gen_opparam_buf[pi++] = ret; #else - *s->gen_opparam_ptr++ = ret; - *s->gen_opparam_ptr++ = ret + 1; + s->gen_opparam_buf[pi++] = ret; + s->gen_opparam_buf[pi++] = ret + 1; #endif nb_rets = 2; } else { - *s->gen_opparam_ptr++ = ret; + s->gen_opparam_buf[pi++] = ret; nb_rets = 1; } #endif @@ -801,7 +800,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, #ifdef TCG_TARGET_CALL_ALIGN_ARGS /* some targets want aligned 64 bit args */ if (real_args & 1) { - *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG; + s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG; real_args++; } #endif @@ -816,26 +815,42 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, have to get more complicated to differentiate between stack arguments and register arguments. */ #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) - *s->gen_opparam_ptr++ = args[i] + 1; - *s->gen_opparam_ptr++ = args[i]; + s->gen_opparam_buf[pi++] = args[i] + 1; + s->gen_opparam_buf[pi++] = args[i]; #else - *s->gen_opparam_ptr++ = args[i]; - *s->gen_opparam_ptr++ = args[i] + 1; + s->gen_opparam_buf[pi++] = args[i]; + s->gen_opparam_buf[pi++] = args[i] + 1; #endif real_args += 2; continue; } - *s->gen_opparam_ptr++ = args[i]; + s->gen_opparam_buf[pi++] = args[i]; real_args++; } - *s->gen_opparam_ptr++ = (uintptr_t)func; - *s->gen_opparam_ptr++ = flags; + s->gen_opparam_buf[pi++] = (uintptr_t)func; + s->gen_opparam_buf[pi++] = flags; + + i = s->gen_next_op_idx; + tcg_debug_assert(i < OPC_BUF_SIZE); + tcg_debug_assert(pi <= OPPARAM_BUF_SIZE); + + /* Set links for sequential allocation during translation. */ + s->gen_op_buf[i] = (TCGOp){ + .opc = INDEX_op_call, + .callo = nb_rets, + .calli = real_args, + .args = pi_first, + .prev = i - 1, + .next = i + 1 + }; - *nparam = (nb_rets << 16) | real_args; + /* Make sure the calli field didn't overflow. */ + tcg_debug_assert(s->gen_op_buf[i].calli == real_args); - /* total parameters, needed to go backward in the instruction stream */ - *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3; + s->gen_last_op_idx = i; + s->gen_next_op_idx = i + 1; + s->gen_next_parm_idx = pi; #if defined(__sparc__) && !defined(__arch64__) \ && !defined(CONFIG_TCG_INTERPRETER) @@ -972,20 +987,21 @@ static const char * const ldst_name[] = void tcg_dump_ops(TCGContext *s) { - const uint16_t *opc_ptr; - const TCGArg *args; - TCGArg arg; - TCGOpcode c; - int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn; - const TCGOpDef *def; char buf[128]; + TCGOp *op; + int oi; - first_insn = 1; - opc_ptr = s->gen_opc_buf; - args = s->gen_opparam_buf; - while (opc_ptr < s->gen_opc_ptr) { - c = *opc_ptr++; + for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { + int i, k, nb_oargs, nb_iargs, nb_cargs; + const TCGOpDef *def; + const TCGArg *args; + TCGOpcode c; + + op = &s->gen_op_buf[oi]; + c = op->opc; def = &tcg_op_defs[c]; + args = &s->gen_opparam_buf[op->args]; + if (c == INDEX_op_debug_insn_start) { uint64_t pc; #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS @@ -993,21 +1009,14 @@ void tcg_dump_ops(TCGContext *s) #else pc = args[0]; #endif - if (!first_insn) { + if (oi != s->gen_first_op_idx) { qemu_log("\n"); } qemu_log(" ---- 0x%" PRIx64, pc); - first_insn = 0; - nb_oargs = def->nb_oargs; - nb_iargs = def->nb_iargs; - nb_cargs = def->nb_cargs; } else if (c == INDEX_op_call) { - TCGArg arg; - /* variable number of arguments */ - arg = *args++; - nb_oargs = arg >> 16; - nb_iargs = arg & 0xffff; + nb_oargs = op->callo; + nb_iargs = op->calli; nb_cargs = def->nb_cargs; /* function name, flags, out args */ @@ -1028,26 +1037,20 @@ void tcg_dump_ops(TCGContext *s) } } else { qemu_log(" %s ", def->name); - if (c == INDEX_op_nopn) { - /* variable number of arguments */ - nb_cargs = *args; - nb_oargs = 0; - nb_iargs = 0; - } else { - nb_oargs = def->nb_oargs; - nb_iargs = def->nb_iargs; - nb_cargs = def->nb_cargs; - } - + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + k = 0; - for(i = 0; i < nb_oargs; i++) { + for (i = 0; i < nb_oargs; i++) { if (k != 0) { qemu_log(","); } qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++])); } - for(i = 0; i < nb_iargs; i++) { + for (i = 0; i < nb_iargs; i++) { if (k != 0) { qemu_log(","); } @@ -1085,16 +1088,14 @@ void tcg_dump_ops(TCGContext *s) i = 0; break; } - for(; i < nb_cargs; i++) { + for (; i < nb_cargs; i++) { if (k != 0) { qemu_log(","); } - arg = args[k++]; - qemu_log("$0x%" TCG_PRIlx, arg); + qemu_log("$0x%" TCG_PRIlx, args[k++]); } } qemu_log("\n"); - args += nb_iargs + nb_oargs + nb_cargs; } } @@ -1244,20 +1245,6 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs) } #ifdef USE_LIVENESS_ANALYSIS - -/* set a nop for an operation using 'nb_args' */ -static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr, - TCGArg *args, int nb_args) -{ - if (nb_args == 0) { - *opc_ptr = INDEX_op_nop; - } else { - *opc_ptr = INDEX_op_nopn; - args[0] = nb_args; - args[nb_args - 1] = nb_args; - } -} - /* liveness analysis: end of function: all temps are dead, and globals should be in memory. */ static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps, @@ -1287,19 +1274,10 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, temporaries are removed. */ static void tcg_liveness_analysis(TCGContext *s) { - int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops; - TCGOpcode op, op_new, op_new2; - TCGArg *args, arg; - const TCGOpDef *def; uint8_t *dead_temps, *mem_temps; - uint16_t dead_args; - uint8_t sync_args; - bool have_op_new2; - - s->gen_opc_ptr++; /* skip end */ - - nb_ops = s->gen_opc_ptr - s->gen_opc_buf; + int oi, oi_prev, nb_ops; + nb_ops = s->gen_next_op_idx; s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t)); @@ -1307,25 +1285,31 @@ static void tcg_liveness_analysis(TCGContext *s) mem_temps = tcg_malloc(s->nb_temps); tcg_la_func_end(s, dead_temps, mem_temps); - args = s->gen_opparam_ptr; - op_index = nb_ops - 1; - while (op_index >= 0) { - op = s->gen_opc_buf[op_index]; - def = &tcg_op_defs[op]; - switch(op) { + for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) { + int i, nb_iargs, nb_oargs; + TCGOpcode opc_new, opc_new2; + bool have_opc_new2; + uint16_t dead_args; + uint8_t sync_args; + TCGArg arg; + + TCGOp * const op = &s->gen_op_buf[oi]; + TCGArg * const args = &s->gen_opparam_buf[op->args]; + TCGOpcode opc = op->opc; + const TCGOpDef *def = &tcg_op_defs[opc]; + + oi_prev = op->prev; + + switch (opc) { case INDEX_op_call: { int call_flags; - nb_args = args[-1]; - args -= nb_args; - arg = *args++; - nb_iargs = arg & 0xffff; - nb_oargs = arg >> 16; + nb_oargs = op->callo; + nb_iargs = op->calli; call_flags = args[nb_oargs + nb_iargs + 1]; - /* pure functions can be removed if their result is not - used */ + /* pure functions can be removed if their result is unused */ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { for (i = 0; i < nb_oargs; i++) { arg = args[i]; @@ -1333,8 +1317,7 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_not_remove_call; } } - tcg_set_nop(s, s->gen_opc_buf + op_index, - args - 1, nb_args); + goto do_remove; } else { do_not_remove_call: @@ -1373,41 +1356,33 @@ static void tcg_liveness_analysis(TCGContext *s) dead_temps[arg] = 0; } } - s->op_dead_args[op_index] = dead_args; - s->op_sync_args[op_index] = sync_args; + s->op_dead_args[oi] = dead_args; + s->op_sync_args[oi] = sync_args; } - args--; } break; case INDEX_op_debug_insn_start: - args -= def->nb_args; - break; - case INDEX_op_nopn: - nb_args = args[-1]; - args -= nb_args; + case INDEX_op_nop: + case INDEX_op_end: break; case INDEX_op_discard: - args--; /* mark the temporary as dead */ dead_temps[args[0]] = 1; mem_temps[args[0]] = 0; break; - case INDEX_op_end: - break; case INDEX_op_add2_i32: - op_new = INDEX_op_add_i32; + opc_new = INDEX_op_add_i32; goto do_addsub2; case INDEX_op_sub2_i32: - op_new = INDEX_op_sub_i32; + opc_new = INDEX_op_sub_i32; goto do_addsub2; case INDEX_op_add2_i64: - op_new = INDEX_op_add_i64; + opc_new = INDEX_op_add_i64; goto do_addsub2; case INDEX_op_sub2_i64: - op_new = INDEX_op_sub_i64; + opc_new = INDEX_op_sub_i64; do_addsub2: - args -= 6; nb_iargs = 4; nb_oargs = 2; /* Test if the high part of the operation is dead, but not @@ -1418,12 +1393,11 @@ static void tcg_liveness_analysis(TCGContext *s) if (dead_temps[args[0]] && !mem_temps[args[0]]) { goto do_remove; } - /* Create the single operation plus nop. */ - s->gen_opc_buf[op_index] = op = op_new; + /* Replace the opcode and adjust the args in place, + leaving 3 unused args at the end. */ + op->opc = opc = opc_new; args[1] = args[2]; args[2] = args[4]; - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); - tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3); /* Fall through and mark the single-word operation live. */ nb_iargs = 2; nb_oargs = 1; @@ -1431,27 +1405,26 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_not_remove; case INDEX_op_mulu2_i32: - op_new = INDEX_op_mul_i32; - op_new2 = INDEX_op_muluh_i32; - have_op_new2 = TCG_TARGET_HAS_muluh_i32; + opc_new = INDEX_op_mul_i32; + opc_new2 = INDEX_op_muluh_i32; + have_opc_new2 = TCG_TARGET_HAS_muluh_i32; goto do_mul2; case INDEX_op_muls2_i32: - op_new = INDEX_op_mul_i32; - op_new2 = INDEX_op_mulsh_i32; - have_op_new2 = TCG_TARGET_HAS_mulsh_i32; + opc_new = INDEX_op_mul_i32; + opc_new2 = INDEX_op_mulsh_i32; + have_opc_new2 = TCG_TARGET_HAS_mulsh_i32; goto do_mul2; case INDEX_op_mulu2_i64: - op_new = INDEX_op_mul_i64; - op_new2 = INDEX_op_muluh_i64; - have_op_new2 = TCG_TARGET_HAS_muluh_i64; + opc_new = INDEX_op_mul_i64; + opc_new2 = INDEX_op_muluh_i64; + have_opc_new2 = TCG_TARGET_HAS_muluh_i64; goto do_mul2; case INDEX_op_muls2_i64: - op_new = INDEX_op_mul_i64; - op_new2 = INDEX_op_mulsh_i64; - have_op_new2 = TCG_TARGET_HAS_mulsh_i64; + opc_new = INDEX_op_mul_i64; + opc_new2 = INDEX_op_mulsh_i64; + have_opc_new2 = TCG_TARGET_HAS_mulsh_i64; goto do_mul2; do_mul2: - args -= 4; nb_iargs = 2; nb_oargs = 2; if (dead_temps[args[1]] && !mem_temps[args[1]]) { @@ -1460,28 +1433,25 @@ static void tcg_liveness_analysis(TCGContext *s) goto do_remove; } /* The high part of the operation is dead; generate the low. */ - s->gen_opc_buf[op_index] = op = op_new; + op->opc = opc = opc_new; args[1] = args[2]; args[2] = args[3]; - } else if (have_op_new2 && dead_temps[args[0]] + } else if (have_opc_new2 && dead_temps[args[0]] && !mem_temps[args[0]]) { - /* The low part of the operation is dead; generate the high. */ - s->gen_opc_buf[op_index] = op = op_new2; + /* The low part of the operation is dead; generate the high. */ + op->opc = opc = opc_new2; args[0] = args[1]; args[1] = args[2]; args[2] = args[3]; } else { goto do_not_remove; } - assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop); - tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1); /* Mark the single-word operation live. */ nb_oargs = 1; goto do_not_remove; default: /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */ - args -= def->nb_args; nb_iargs = def->nb_iargs; nb_oargs = def->nb_oargs; @@ -1489,24 +1459,23 @@ static void tcg_liveness_analysis(TCGContext *s) its outputs are dead. We assume that nb_oargs == 0 implies side effects */ if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { - for(i = 0; i < nb_oargs; i++) { + for (i = 0; i < nb_oargs; i++) { arg = args[i]; if (!dead_temps[arg] || mem_temps[arg]) { goto do_not_remove; } } do_remove: - tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args); + op->opc = INDEX_op_nop; #ifdef CONFIG_PROFILER s->del_op_count++; #endif } else { do_not_remove: - /* output args are dead */ dead_args = 0; sync_args = 0; - for(i = 0; i < nb_oargs; i++) { + for (i = 0; i < nb_oargs; i++) { arg = args[i]; if (dead_temps[arg]) { dead_args |= (1 << i); @@ -1527,23 +1496,18 @@ static void tcg_liveness_analysis(TCGContext *s) } /* input args are live */ - for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { arg = args[i]; if (dead_temps[arg]) { dead_args |= (1 << i); } dead_temps[arg] = 0; } - s->op_dead_args[op_index] = dead_args; - s->op_sync_args[op_index] = sync_args; + s->op_dead_args[oi] = dead_args; + s->op_sync_args[oi] = sync_args; } break; } - op_index--; - } - - if (args != s->gen_opparam_buf) { - tcg_abort(); } } #else @@ -2110,11 +2074,11 @@ static void tcg_reg_alloc_op(TCGContext *s, #define STACK_DIR(x) (x) #endif -static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, - TCGOpcode opc, const TCGArg *args, - uint16_t dead_args, uint8_t sync_args) +static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs, + const TCGArg * const args, uint16_t dead_args, + uint8_t sync_args) { - int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params; + int flags, nb_regs, i, reg; TCGArg arg; TCGTemp *ts; intptr_t stack_offset; @@ -2123,22 +2087,16 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, int allocate_args; TCGRegSet allocated_regs; - arg = *args++; - - nb_oargs = arg >> 16; - nb_iargs = arg & 0xffff; - nb_params = nb_iargs; - func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; flags = args[nb_oargs + nb_iargs + 1]; nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs); - if (nb_regs > nb_params) { - nb_regs = nb_params; + if (nb_regs > nb_iargs) { + nb_regs = nb_iargs; } /* assign stack slots first */ - call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long); + call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long); call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & ~(TCG_TARGET_STACK_ALIGN - 1); allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE); @@ -2149,7 +2107,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, } stack_offset = TCG_TARGET_CALL_STACK_OFFSET; - for(i = nb_regs; i < nb_params; i++) { + for(i = nb_regs; i < nb_iargs; i++) { arg = args[nb_oargs + i]; #ifdef TCG_TARGET_STACK_GROWSUP stack_offset -= sizeof(tcg_target_long); @@ -2256,8 +2214,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, } } } - - return nb_iargs + nb_oargs + def->nb_cargs + 1; } #ifdef CONFIG_PROFILER @@ -2285,10 +2241,7 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_insn_unit *gen_code_buf, long search_pc) { - TCGOpcode opc; - int op_index; - const TCGOpDef *def; - const TCGArg *args; + int oi, oi_next; #ifdef DEBUG_DISAS if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) { @@ -2303,8 +2256,7 @@ static inline int tcg_gen_code_common(TCGContext *s, #endif #ifdef USE_TCG_OPTIMIZATIONS - s->gen_opparam_ptr = - tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs); + tcg_optimize(s); #endif #ifdef CONFIG_PROFILER @@ -2333,42 +2285,31 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_out_tb_init(s); - args = s->gen_opparam_buf; - op_index = 0; + for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { + TCGOp * const op = &s->gen_op_buf[oi]; + TCGArg * const args = &s->gen_opparam_buf[op->args]; + TCGOpcode opc = op->opc; + const TCGOpDef *def = &tcg_op_defs[opc]; + uint16_t dead_args = s->op_dead_args[oi]; + uint8_t sync_args = s->op_sync_args[oi]; - for(;;) { - opc = s->gen_opc_buf[op_index]; + oi_next = op->next; #ifdef CONFIG_PROFILER tcg_table_op_count[opc]++; #endif - def = &tcg_op_defs[opc]; -#if 0 - printf("%s: %d %d %d\n", def->name, - def->nb_oargs, def->nb_iargs, def->nb_cargs); - // dump_regs(s); -#endif - switch(opc) { + + switch (opc) { case INDEX_op_mov_i32: case INDEX_op_mov_i64: - tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index], - s->op_sync_args[op_index]); + tcg_reg_alloc_mov(s, def, args, dead_args, sync_args); break; case INDEX_op_movi_i32: case INDEX_op_movi_i64: - tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index], - s->op_sync_args[op_index]); + tcg_reg_alloc_movi(s, args, dead_args, sync_args); break; case INDEX_op_debug_insn_start: - /* debug instruction */ - break; case INDEX_op_nop: - case INDEX_op_nop1: - case INDEX_op_nop2: - case INDEX_op_nop3: break; - case INDEX_op_nopn: - args += args[0]; - goto next; case INDEX_op_discard: temp_dead(s, args[0]); break; @@ -2377,12 +2318,9 @@ static inline int tcg_gen_code_common(TCGContext *s, tcg_out_label(s, args[0], s->code_ptr); break; case INDEX_op_call: - args += tcg_reg_alloc_call(s, def, opc, args, - s->op_dead_args[op_index], - s->op_sync_args[op_index]); - goto next; - case INDEX_op_end: - goto the_end; + tcg_reg_alloc_call(s, op->callo, op->calli, args, + dead_args, sync_args); + break; default: /* Sanity check that we've not introduced any unhandled opcodes. */ if (def->flags & TCG_OPF_NOT_PRESENT) { @@ -2391,21 +2329,17 @@ static inline int tcg_gen_code_common(TCGContext *s, /* Note: in order to speed up the code, it would be much faster to have specialized register allocator functions for some common argument patterns */ - tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index], - s->op_sync_args[op_index]); + tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args); break; } - args += def->nb_args; - next: if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) { - return op_index; + return oi; } - op_index++; #ifndef NDEBUG check_regs(s); #endif } - the_end: + /* Generate TB finalization at the end of block */ tcg_out_tb_finalize(s); return -1; @@ -2416,14 +2350,18 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) #ifdef CONFIG_PROFILER { int n; - n = (s->gen_opc_ptr - s->gen_opc_buf); + + n = s->gen_last_op_idx + 1; s->op_count += n; - if (n > s->op_count_max) + if (n > s->op_count_max) { s->op_count_max = n; + } - s->temp_count += s->nb_temps; - if (s->nb_temps > s->temp_count_max) - s->temp_count_max = s->nb_temps; + n = s->nb_temps; + s->temp_count += n; + if (n > s->temp_count_max) { + s->temp_count_max = n; + } } #endif diff --git a/tcg/tcg.h b/tcg/tcg.h index 95f1aad..596e30a 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -448,10 +448,28 @@ typedef struct TCGTempSet { unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; } TCGTempSet; +typedef struct TCGOp { + TCGOpcode opc : 8; + + /* The number of out and in parameter for a call. */ + unsigned callo : 2; + unsigned calli : 6; + + /* Index of the arguments for this op, or -1 for zero-operand ops. */ + signed args : 16; + + /* Index of the prex/next op, or -1 for the end of the list. */ + signed prev : 16; + signed next : 16; +} TCGOp; + +QEMU_BUILD_BUG_ON(NB_OPS > 0xff); +QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff); +QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff); + struct TCGContext { uint8_t *pool_cur, *pool_end; TCGPool *pool_first, *pool_current, *pool_first_large; - TCGLabel *labels; int nb_labels; int nb_globals; int nb_temps; @@ -469,9 +487,6 @@ struct TCGContext { corresponding output argument needs to be sync to memory. */ - /* tells in which temporary a given register is. It does not take - into account fixed registers */ - int reg_to_temp[TCG_TARGET_NB_REGS]; TCGRegSet reserved_regs; intptr_t current_frame_offset; intptr_t frame_start; @@ -479,8 +494,6 @@ struct TCGContext { int frame_reg; tcg_insn_unit *code_ptr; - TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ - TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; GHashTable *helpers; @@ -508,14 +521,10 @@ struct TCGContext { int goto_tb_issue_mask; #endif - uint16_t gen_opc_buf[OPC_BUF_SIZE]; - TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; - - uint16_t *gen_opc_ptr; - TCGArg *gen_opparam_ptr; - target_ulong gen_opc_pc[OPC_BUF_SIZE]; - uint16_t gen_opc_icount[OPC_BUF_SIZE]; - uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; + int gen_first_op_idx; + int gen_last_op_idx; + int gen_next_op_idx; + int gen_next_parm_idx; /* Code generation. Note that we specifically do not use tcg_insn_unit here, because there's too much arithmetic throughout that relies @@ -533,6 +542,22 @@ struct TCGContext { /* The TCGBackendData structure is private to tcg-target.c. */ struct TCGBackendData *be; + + TCGTempSet free_temps[TCG_TYPE_COUNT * 2]; + TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */ + + /* tells in which temporary a given register is. It does not take + into account fixed registers */ + int reg_to_temp[TCG_TARGET_NB_REGS]; + + TCGOp gen_op_buf[OPC_BUF_SIZE]; + TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; + + target_ulong gen_opc_pc[OPC_BUF_SIZE]; + uint16_t gen_opc_icount[OPC_BUF_SIZE]; + uint8_t gen_opc_instr_start[OPC_BUF_SIZE]; + + TCGLabel labels[TCG_MAX_LABELS]; }; extern TCGContext tcg_ctx; @@ -540,7 +565,7 @@ extern TCGContext tcg_ctx; /* The number of opcodes emitted so far. */ static inline int tcg_op_buf_count(void) { - return tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf; + return tcg_ctx.gen_next_op_idx; } /* Test for whether to terminate the TB for using too many opcodes. */ @@ -718,8 +743,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs); void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret, int nargs, TCGArg *args); -TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args, - TCGOpDef *tcg_op_def); +void tcg_optimize(TCGContext *s); /* only used for debugging purposes */ void tcg_dump_ops(TCGContext *s); -- 2.7.4