radv,aco: Don't lower and vectorize 16bit iabs.
authorGeorg Lehmann <dadschoorse@gmail.com>
Sat, 9 Jul 2022 11:32:28 +0000 (13:32 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 20 Jul 2022 14:31:15 +0000 (14:31 +0000)
Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17440>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/vulkan/radv_pipeline.c

index e3de4ef..69b82d3 100644 (file)
@@ -1495,12 +1495,31 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
       break;
    }
    case nir_op_iabs: {
+      if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
+         Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
+
+         unsigned opsel_lo = (instr->src[0].swizzle[0] & 1) << 1;
+         unsigned opsel_hi = ((instr->src[0].swizzle[1] & 1) << 1) | 1;
+
+         Temp sub = bld.vop3p(aco_opcode::v_pk_sub_u16, Definition(bld.tmp(v1)), Operand::zero(),
+                              src, opsel_lo, opsel_hi);
+         bld.vop3p(aco_opcode::v_pk_max_i16, Definition(dst), sub, src, opsel_lo, opsel_hi);
+         break;
+      }
       Temp src = get_alu_src(ctx, instr->src[0]);
       if (dst.regClass() == s1) {
          bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src);
       } else if (dst.regClass() == v1) {
          bld.vop2(aco_opcode::v_max_i32, Definition(dst), src,
                   bld.vsub32(bld.def(v1), Operand::zero(), src));
+      } else if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX10) {
+         bld.vop3(
+            aco_opcode::v_max_i16_e64, Definition(dst), src,
+            bld.vop3(aco_opcode::v_sub_u16_e64, Definition(bld.tmp(v2b)), Operand::zero(2), src));
+      } else if (dst.regClass() == v2b) {
+         src = as_vgpr(ctx, src);
+         bld.vop2(aco_opcode::v_max_i16, Definition(dst), src,
+                  bld.vop2(aco_opcode::v_sub_u16, Definition(bld.tmp(v2b)), Operand::zero(2), src));
       } else {
          isel_err(&instr->instr, "Unimplemented NIR instr bit size");
       }
index a33776f..44cfef4 100644 (file)
@@ -556,6 +556,7 @@ init_context(isel_context* ctx, nir_shader* shader)
                case nir_op_b2f16:
                case nir_op_b2f32:
                case nir_op_mov: break;
+               case nir_op_iabs:
                case nir_op_iadd:
                case nir_op_iadd_sat:
                case nir_op_uadd_sat:
index c9a2cd8..bf7a587 100644 (file)
@@ -3947,13 +3947,13 @@ lower_bit_size_callback(const nir_instr *instr, void *_)
    if (alu->dest.dest.ssa.bit_size & (8 | 16)) {
       unsigned bit_size = alu->dest.dest.ssa.bit_size;
       switch (alu->op) {
-      case nir_op_iabs:
       case nir_op_bitfield_select:
       case nir_op_imul_high:
       case nir_op_umul_high:
       case nir_op_ineg:
       case nir_op_isign:
          return 32;
+      case nir_op_iabs:
       case nir_op_imax:
       case nir_op_umax:
       case nir_op_imin:
@@ -4026,6 +4026,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
    case nir_op_fsat:
    case nir_op_fmin:
    case nir_op_fmax:
+   case nir_op_iabs:
    case nir_op_iadd:
    case nir_op_iadd_sat:
    case nir_op_uadd_sat: