aco: Implement byte and lane permute intrinsics.
authorTimur Kristóf <timur.kristof@gmail.com>
Fri, 28 May 2021 19:57:43 +0000 (21:57 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 9 Jun 2021 16:48:51 +0000 (16:48 +0000)
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11072>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp

index 40a1687..c06bdb1 100644 (file)
@@ -8387,6 +8387,31 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
       emit_wqm(bld, wqm_tmp, dst);
       break;
    }
+   case nir_intrinsic_byte_permute_amd: {
+      Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
+      assert(dst.regClass() == v1);
+      assert(ctx->program->chip_class >= GFX8);
+      bld.vop3(aco_opcode::v_perm_b32, Definition(dst),
+               get_ssa_temp(ctx, instr->src[0].ssa),
+               as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa)),
+               as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa)));
+      break;
+   }
+   case nir_intrinsic_lane_permute_16_amd: {
+      Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
+      Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
+      assert(ctx->program->chip_class >= GFX10);
+
+      if (src.regClass() == s1) {
+         bld.copy(Definition(dst), src);
+      } else if (dst.regClass() == v1 && src.regClass() == v1) {
+         bld.vop3(aco_opcode::v_permlane16_b32, Definition(dst), src,
+                  bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa)), bld.as_uniform(get_ssa_temp(ctx, instr->src[2].ssa)));
+      } else {
+         isel_err(&instr->instr, "Unimplemented lane_permute_16_amd");
+      }
+      break;
+   }
    case nir_intrinsic_load_helper_invocation:
    case nir_intrinsic_is_helper_invocation: {
       /* load_helper() after demote() get lowered to is_helper().
index f7cebe0..7141bbf 100644 (file)
@@ -772,6 +772,8 @@ void init_context(isel_context *ctx, nir_shader *shader)
                   case nir_intrinsic_load_tess_coord:
                   case nir_intrinsic_write_invocation_amd:
                   case nir_intrinsic_mbcnt_amd:
+                  case nir_intrinsic_byte_permute_amd:
+                  case nir_intrinsic_lane_permute_16_amd:
                   case nir_intrinsic_load_instance_id:
                   case nir_intrinsic_ssbo_atomic_add:
                   case nir_intrinsic_ssbo_atomic_imin: