nir: add nir_intrinsic_optimization_barrier_vgpr_amd for LLVM
authorMarek Olšák <marek.olsak@amd.com>
Sat, 22 Oct 2022 00:29:31 +0000 (20:29 -0400)
committerMarge Bot <emma+marge@anholt.net>
Sat, 29 Oct 2022 18:38:33 +0000 (18:38 +0000)
We need this for the MSAA resolve shader.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Mihai Preda <mhpreda@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19243>

src/amd/llvm/ac_nir_to_llvm.c
src/compiler/nir/nir_divergence_analysis.c
src/compiler/nir/nir_intrinsics.py

index 69dc14c..05a1aee 100644 (file)
@@ -3954,6 +3954,10 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
 
       ac_build_s_barrier(&ctx->ac, ctx->stage);
       break;
+   case nir_intrinsic_optimization_barrier_vgpr_amd:
+      result = get_src(ctx, instr->src[0]);
+      ac_build_optimization_barrier(&ctx->ac, &result, false);
+      break;
    case nir_intrinsic_shared_atomic_add:
    case nir_intrinsic_shared_atomic_imin:
    case nir_intrinsic_shared_atomic_umin:
index 4d8dce3..b916e63 100644 (file)
@@ -363,6 +363,9 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
                      instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
       break;
 
+   case nir_intrinsic_optimization_barrier_vgpr_amd:
+      is_divergent = instr->src[0].ssa->divergent;
+      break;
 
    /* Intrinsics with divergence depending on sources */
    case nir_intrinsic_ballot_bitfield_extract:
index cde500e..8cea094 100644 (file)
@@ -1304,6 +1304,13 @@ store("tf_r600", [])
 
 # AMD GCN/RDNA specific intrinsics
 
+# This barrier is a hint that prevents moving the instruction that computes
+# src after this barrier. It's a constraint for the instruction scheduler.
+# Otherwise it's identical to a move instruction.
+# On AMD, it also forces the src value to be stored in a VGPR.
+intrinsic("optimization_barrier_vgpr_amd", dest_comp=0, src_comp=[0],
+          flags=[CAN_ELIMINATE])
+
 # src[] = { descriptor, vector byte offset, scalar byte offset, index offset }
 # The index offset is multiplied by the stride in the descriptor. The vertex/scalar byte offsets
 # are in bytes.