From 56565b7bba54b8298d2c14c66bb87c59930b09ee Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 25 Feb 2020 10:44:26 -0800 Subject: [PATCH] freedreno/ir3: update SFU delay 1) emperically, 10 seems like a more accurate # than 4 2) push "soft" delay handling into ir3_delayslots(), as we should also be using it to calculate the costs that the schedulers use Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3.h | 2 +- src/freedreno/ir3/ir3_delay.c | 26 ++++++++++++++++---------- src/freedreno/ir3/ir3_depth.c | 2 +- src/freedreno/ir3/ir3_postsched.c | 2 +- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 21fd8c6..b66d8e2 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1157,7 +1157,7 @@ void ir3_print_instr(struct ir3_instruction *instr); /* delay calculation: */ int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n); + struct ir3_instruction *consumer, unsigned n, bool soft); unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr, bool soft, bool pred); void ir3_remove_nops(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 0b796a4..5839128 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -69,7 +69,7 @@ ignore_dep(struct ir3_instruction *assigner, */ int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n) + struct ir3_instruction *consumer, unsigned n, bool soft) { if (ignore_dep(assigner, consumer, n)) return 0; @@ -85,6 +85,20 @@ ir3_delayslots(struct ir3_instruction *assigner, if (writes_addr(assigner)) return 6; + /* On a6xx, it takes the number of delay slots to get a SFU result + * back (ie. using nop's instead of (ss) is: + * + * 8 - single warp + * 9 - two warps + * 10 - four warps + * + * and so on. Not quite sure where it tapers out (ie. how many + * warps share an SFU unit). But 10 seems like a reasonable # + * to choose: + */ + if (soft && is_sfu(assigner)) + return 10; + /* handled via sync flags: */ if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) return 0; @@ -195,15 +209,7 @@ delay_calc_srcn(struct ir3_block *block, delay = MAX2(delay, d); } } else { - if (soft) { - if (is_sfu(assigner)) { - delay = 4; - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } + delay = ir3_delayslots(assigner, consumer, srcn, soft); delay -= distance(block, assigner, delay, pred); } diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c index 135d436..6bb9468 100644 --- a/src/freedreno/ir3/ir3_depth.c +++ b/src/freedreno/ir3/ir3_depth.c @@ -89,7 +89,7 @@ ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep) if (i == 0) continue; - sd = ir3_delayslots(src, instr, i) + src->depth; + sd = ir3_delayslots(src, instr, i, true) + src->depth; sd += boost; instr->depth = MAX2(instr->depth, sd); diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c index 4290e88..47a8e52 100644 --- a/src/freedreno/ir3/ir3_postsched.c +++ b/src/freedreno/ir3/ir3_postsched.c @@ -380,7 +380,7 @@ calculate_deps(struct ir3_postsched_deps_state *state, struct ir3_postsched_node *dep = dep_reg(state, reg->num + b); if (dep && (state->direction == F)) { - unsigned d = ir3_delayslots(dep->instr, node->instr, i); + unsigned d = ir3_delayslots(dep->instr, node->instr, i, true); node->delay = MAX2(node->delay, d); } } -- 2.7.4