From 5bc100eb2def6fa1ae9b85dc81e9ebc672c32ec5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 15 Mar 2021 13:35:54 +0000 Subject: [PATCH] aco: use a single instruction for uadd32_sat() on GFX8 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit fossil-db (GFX8): Totals from 8 (0.01% of 147787) affected shaders: SGPRs: 352 -> 368 (+4.55%) CodeSize: 49576 -> 48788 (-1.59%) Instrs: 9487 -> 9318 (-1.78%) Latency: 49935 -> 49607 (-0.66%) InvThroughput: 138493 -> 137443 (-0.76%) Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index cbce772..7b95984 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1230,13 +1230,18 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val) Temp uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1) { + if (bld.program->chip_class < GFX8) { + Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true); + return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand((uint32_t) -1), add.def(1).getTemp()); + } + + Builder::Result add(NULL); if (bld.program->chip_class >= GFX9) { - Builder::Result add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1); - add.instr->vop3().clamp = 1; + add = bld.vop2_e64(aco_opcode::v_add_u32, dst, src0, src1); } else { - Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true); - bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand((uint32_t) -1), add.def(1).getTemp()); + add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.hint_vcc(bld.def(bld.lm)), src0, src1); } + add.instr->vop3().clamp = 1; return dst.getTemp(); } -- 2.7.4