aco: apply NUW to additions for scratch access
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 1 Dec 2022 18:04:49 +0000 (18:04 +0000)
committerMarge Bot <emma+marge@anholt.net>
Tue, 6 Dec 2022 15:23:38 +0000 (15:23 +0000)
fossil-db (navi21):
Totals from 52 (0.04% of 135636) affected shaders:
Instrs: 79036 -> 78567 (-0.59%)
CodeSize: 431188 -> 427984 (-0.74%)
Latency: 1318142 -> 1313821 (-0.33%)
InvThroughput: 293842 -> 292836 (-0.34%)
VClause: 2555 -> 2361 (-7.59%); split: -8.06%, +0.47%
Copies: 8746 -> 8767 (+0.24%); split: -0.11%, +0.35%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20117>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp

index 973399b..d52e703 100644 (file)
@@ -1873,7 +1873,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
       Temp src0 = get_alu_src(ctx, instr->src[0]);
       Temp src1 = get_alu_src(ctx, instr->src[1]);
       if (dst.type() == RegType::vgpr && dst.bytes() <= 4) {
-         bld.vadd32(Definition(dst), Operand(src0), Operand(src1));
+         if (instr->no_unsigned_wrap)
+            bld.nuw().vadd32(Definition(dst), Operand(src0), Operand(src1));
+         else
+            bld.vadd32(Definition(dst), Operand(src0), Operand(src1));
          break;
       }
 
index c7356e4..12a6e55 100644 (file)
@@ -231,6 +231,8 @@ apply_nuw_to_offsets(isel_context* ctx, nir_function_impl* impl)
             if (!nir_src_is_divergent(intrin->src[2]))
                apply_nuw_to_ssa(ctx, intrin->src[2].ssa);
             break;
+         case nir_intrinsic_load_scratch: apply_nuw_to_ssa(ctx, intrin->src[0].ssa); break;
+         case nir_intrinsic_store_scratch: apply_nuw_to_ssa(ctx, intrin->src[1].ssa); break;
          default: break;
          }
       }