aco: use s_bitreplicate_b64_b32 to set exec to 0xffff0000ffff0000
authorGeorg Lehmann <dadschoorse@gmail.com>
Tue, 8 Aug 2023 16:48:59 +0000 (18:48 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 9 Aug 2023 20:29:01 +0000 (20:29 +0000)
Foz-DB Navi21:
Totals from 29 (0.02% of 132657) affected shaders:
Instrs: 19342 -> 19301 (-0.21%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24561>

src/amd/compiler/aco_lower_to_hw_instr.cpp

index ea43c3f..8aa1232 100644 (file)
@@ -869,10 +869,11 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
       emit_dpp_op(ctx, tmp, tmp, tmp, vtmp, reduce_op, src.size(), dpp_row_sr(8), 0xf, 0xf, false,
                   identity);
       if (ctx->program->gfx_level >= GFX10) {
-         bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u),
-                  Operand::c32(16u));
          if (ctx->program->wave_size == 64) {
-            bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u),
+            bld.sop1(aco_opcode::s_bitreplicate_b64_b32, Definition(exec, s2),
+                     Operand::c32(0xff00ff00u));
+         } else {
+            bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u),
                      Operand::c32(16u));
          }
          for (unsigned i = 0; i < src.size(); i++) {