From 11b533cb19815c037cfd973966258ed9e43100f2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 11 Nov 2021 10:27:30 +0000 Subject: [PATCH] aco: optimize load_local_invocation_index with single-wave workgroups fossil-db (Sienna Cichlid): Totals from 668 (0.52% of 128647) affected shaders: CodeSize: 2201912 -> 2193336 (-0.39%) Instrs: 403124 -> 402325 (-0.20%) Latency: 4510940 -> 4510214 (-0.02%); split: -0.02%, +0.00% InvThroughput: 681057 -> 679453 (-0.24%); split: -0.24%, +0.00% VClause: 6470 -> 6467 (-0.05%) SClause: 12759 -> 12755 (-0.03%) Copies: 26348 -> 26218 (-0.49%); split: -0.50%, +0.00% PreSGPRs: 26140 -> 26101 (-0.15%) Signed-off-by: Rhys Perry Reviewed-by: Daniel-schuermann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index baa21d6..5a23c78 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8198,6 +8198,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } else if (ctx->stage.hw == HWStage::GS || ctx->stage.hw == HWStage::NGG) { bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx)); break; + } else if (ctx->program->workgroup_size <= ctx->program->wave_size) { + emit_mbcnt(ctx, get_ssa_temp(ctx, &instr->dest.ssa)); + break; } Temp id = emit_mbcnt(ctx, bld.tmp(v1)); -- 2.7.4