radv: vectorize nir_op_fabs
authorDaniel Schürmann <daniel@schuermann.dev>
Thu, 7 Oct 2021 18:20:23 +0000 (20:20 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 27 Jun 2022 15:07:27 +0000 (15:07 +0000)
Totals from 4 (0.00% of 134913) affected shaders: (GFX10.3)
CodeSize: 37868 -> 36576 (-3.41%)
Instrs: 5332 -> 5169 (-3.06%)
Latency: 24452 -> 24174 (-1.14%)
InvThroughput: 9784 -> 9462 (-3.29%)
VClause: 54 -> 50 (-7.41%)
Copies: 520 -> 519 (-0.19%)
PreVGPRs: 266 -> 264 (-0.75%)

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15176>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/vulkan/radv_pipeline.c

index e0a8a7b..34e82a1 100644 (file)
@@ -2522,6 +2522,16 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
       break;
    }
    case nir_op_fabs: {
+      if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
+         Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
+         Instruction* vop3p =
+            bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
+                      instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0)
+               .instr;
+         vop3p->vop3p().neg_lo[1] = true;
+         vop3p->vop3p().neg_hi[1] = true;
+         break;
+      }
       Temp src = get_alu_src(ctx, instr->src[0]);
       if (dst.regClass() == v2b) {
          Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst),
index ebe72c1..f0ef432 100644 (file)
@@ -4072,6 +4072,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
    case nir_op_ffma:
    case nir_op_fdiv:
    case nir_op_flrp:
+   case nir_op_fabs:
    case nir_op_fneg:
    case nir_op_fsat:
    case nir_op_fmin: