From 5c9495cf376507f063d8e931aab4d3ff461de75f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 29 Aug 2023 18:17:10 -0400 Subject: [PATCH] agx: Omit while_icmp without continue The only role of the while_icmp at the end of a NIR loop is to make continue jumps work. If, after emitting the loop, we learn that there are no continues, there is no need to insert a while_icmp since it would be a no-op anyway. total instructions in shared programs: 1764311 -> 1764199 (<.01%) instructions in affected programs: 26321 -> 26209 (-0.43%) helped: 82 HURT: 0 Instructions are helped. total bytes in shared programs: 11609978 -> 11609306 (<.01%) bytes in affected programs: 178842 -> 178170 (-0.38%) helped: 82 HURT: 0 Bytes are helped. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 21 ++++++++++++++++++--- src/asahi/compiler/agx_compiler.h | 3 +++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 191075d..fed9005 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1747,6 +1747,7 @@ agx_emit_jump(agx_builder *b, nir_jump_instr *instr) if (instr->type == nir_jump_continue) { nestings += 1; agx_block_add_successor(ctx->current_block, ctx->continue_block); + ctx->loop_continues = true; } else if (instr->type == nir_jump_break) { nestings += 2; agx_block_add_successor(ctx->current_block, ctx->break_block); @@ -1946,6 +1947,8 @@ emit_loop(agx_context *ctx, nir_loop *nloop) ctx->loop_nesting = 0; ctx->total_nesting++; + bool old_continues = ctx->loop_continues; + agx_block *popped_break = ctx->break_block; agx_block *popped_continue = ctx->continue_block; @@ -1968,10 +1971,21 @@ emit_loop(agx_context *ctx, nir_loop *nloop) ctx->after_block->loop_header = true; agx_block *start_block = emit_cf_list(ctx, &nloop->body); - /* Fix up the nesting counter via an always true while_icmp, and branch back - * to start of loop if any lanes are active */ + /* If we used any continue jumps, we need to reactivate the continued + * threads. We do this with an always true while_icmp, which behaves like: + * + * if (r0l == 1) { + * r0l = 0; + * } + * update_exec + * + * If we did not use continue, this would be a no-op so it is omitted. + */ _b.cursor = agx_after_block(ctx->current_block); - agx_while_icmp(&_b, agx_zero(), agx_zero(), 2, AGX_ICOND_UEQ, false); + + if (ctx->loop_continues) + agx_while_icmp(&_b, agx_zero(), agx_zero(), 2, AGX_ICOND_UEQ, false); + agx_jmp_exec_any(&_b, start_block); agx_pop_exec(&_b, 2); agx_block_add_successor(ctx->current_block, ctx->continue_block); @@ -1990,6 +2004,7 @@ emit_loop(agx_context *ctx, nir_loop *nloop) /* Restore loop nesting (we might be inside an if inside an outer loop) */ ctx->loop_nesting = pushed_nesting; ctx->total_nesting--; + ctx->loop_continues = old_continues; } /* Before the first control flow structure, the nesting counter needs to be diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index c353a77..072d7c4 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -404,6 +404,9 @@ typedef struct { /* Total nesting across all loops, to determine if we need push_exec */ unsigned total_nesting; + /* Whether loop being emitted used any `continue` jumps */ + bool loop_continues; + /* During instruction selection, for inserting control flow */ agx_block *current_block; agx_block *continue_block; -- 2.7.4