aco/gfx10+: use v_cndmask with literal for reduction identity
authorGeorg Lehmann <dadschoorse@gmail.com>
Fri, 16 Jun 2023 13:07:52 +0000 (15:07 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 20 Jun 2023 14:48:18 +0000 (14:48 +0000)
Totals from 10 (0.01% of 132657) affected shaders:
CodeSize: 171576 -> 171288 (-0.17%)
Instrs: 32127 -> 32055 (-0.22%)
Latency: 219145 -> 219027 (-0.05%)
InvThroughput: 130287 -> 130041 (-0.19%)

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23695>

src/amd/compiler/aco_lower_to_hw_instr.cpp

index f79f96a..f35991e 100644 (file)
@@ -488,19 +488,20 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
    bld.sop1(Builder::s_or_saveexec, Definition(stmp, bld.lm), Definition(scc, s1),
             Definition(exec, bld.lm), Operand::c64(UINT64_MAX), Operand(exec, bld.lm));
 
-   for (unsigned i = 0; i < src.size(); i++) {
-      /* p_exclusive_scan needs it to be a sgpr or inline constant for the v_writelane_b32
-       * except on GFX10, where v_writelane_b32 can take a literal. */
-      if (identity[i].isLiteral() && op == aco_opcode::p_exclusive_scan &&
-          ctx->program->gfx_level < GFX10) {
-         bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg{sitmp + i}, s1), identity[i]);
-         identity[i] = Operand(PhysReg{sitmp + i}, s1);
-
-         bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{tmp + i}, v1), identity[i]);
-         vcndmask_identity[i] = Operand(PhysReg{tmp + i}, v1);
-      } else if (identity[i].isLiteral()) {
-         bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{tmp + i}, v1), identity[i]);
-         vcndmask_identity[i] = Operand(PhysReg{tmp + i}, v1);
+   /* On GFX10+ v_writelane_b32/v_cndmask_b32_e64 can take a literal */
+   if (ctx->program->gfx_level < GFX10) {
+      for (unsigned i = 0; i < src.size(); i++) {
+         /* p_exclusive_scan uses identity for v_writelane_b32 */
+         if (identity[i].isLiteral() && op == aco_opcode::p_exclusive_scan) {
+            bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg{sitmp + i}, s1), identity[i]);
+            identity[i] = Operand(PhysReg{sitmp + i}, s1);
+
+            bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{tmp + i}, v1), identity[i]);
+            vcndmask_identity[i] = Operand(PhysReg{tmp + i}, v1);
+         } else if (identity[i].isLiteral()) {
+            bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{tmp + i}, v1), identity[i]);
+            vcndmask_identity[i] = Operand(PhysReg{tmp + i}, v1);
+         }
       }
    }