aco: Don't prematurely emit s_andn2.
authorGeorg Lehmann <dadschoorse@gmail.com>
Mon, 28 Nov 2022 16:10:22 +0000 (17:10 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 30 Nov 2022 18:25:15 +0000 (18:25 +0000)
Split s_not + s_and allows more inverse comparision and s_cbranch_vccz
optimizations.

Foz-DB Navi21:
Totals from 516 (0.38% of 134913) affected shaders:
CodeSize: 7273724 -> 7273720 (-0.00%)
Instrs: 1364408 -> 1364407 (-0.00%)
Latency: 14604862 -> 14604858 (-0.00%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19143>

src/amd/compiler/aco_instruction_selection.cpp

index c7f58a3..0e77b85 100644 (file)
@@ -7862,9 +7862,9 @@ emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp sr
       return src;
    }
    if (op == nir_op_iand && cluster_size == 4) {
-      /* subgroupClusteredAnd(val, 4) -> ~wqm(exec & ~val) */
-      Temp tmp =
-         bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
+      /* subgroupClusteredAnd(val, 4) -> ~wqm(~val & exec) */
+      Temp tmp = bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), src);
+      tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), tmp, Operand(exec, bld.lm));
       return bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc),
                       bld.sop1(Builder::s_wqm, bld.def(bld.lm), bld.def(s1, scc), tmp));
    } else if (op == nir_op_ior && cluster_size == 4) {
@@ -7873,11 +7873,11 @@ emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp sr
          Builder::s_wqm, bld.def(bld.lm), bld.def(s1, scc),
          bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)));
    } else if (op == nir_op_iand && cluster_size == ctx->program->wave_size) {
-      /* subgroupAnd(val) -> (exec & ~val) == 0 */
-      Temp tmp =
-         bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
-            .def(1)
-            .getTemp();
+      /* subgroupAnd(val) -> (~val & exec) == 0 */
+      Temp tmp = bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), src);
+      tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), tmp, Operand(exec, bld.lm))
+               .def(1)
+               .getTemp();
       Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
       return bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), cond);
    } else if (op == nir_op_ior && cluster_size == ctx->program->wave_size) {
@@ -7952,16 +7952,15 @@ emit_boolean_exclusive_scan(isel_context* ctx, nir_op op, Temp src)
    Builder bld(ctx->program, ctx->block);
    assert(src.regClass() == bld.lm);
 
-   /* subgroupExclusiveAnd(val) -> mbcnt(exec & ~val) == 0
+   /* subgroupExclusiveAnd(val) -> mbcnt(~val & exec) == 0
     * subgroupExclusiveOr(val) -> mbcnt(val & exec) != 0
     * subgroupExclusiveXor(val) -> mbcnt(val & exec) & 1 != 0
     */
-   Temp tmp;
    if (op == nir_op_iand)
-      tmp =
-         bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src);
-   else
-      tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
+      src = bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), src);
+
+   Temp tmp =
+      bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
 
    Temp mbcnt = emit_mbcnt(ctx, bld.tmp(v1), Operand(tmp));
 
@@ -8708,10 +8707,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       assert(src.regClass() == bld.lm);
       assert(dst.regClass() == bld.lm);
 
-      Temp tmp =
-         bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src)
-            .def(1)
-            .getTemp();
+      Temp tmp = bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), src);
+      tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), tmp, Operand(exec, bld.lm))
+               .def(1)
+               .getTemp();
       Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
       bld.sop1(Builder::s_not, Definition(dst), bld.def(s1, scc), cond);
       break;