nir,amd: add nir_intrinsic_store_[scalar|vector]_arg_amd to overwrite inputs
authorDaniel Schürmann <daniel@schuermann.dev>
Fri, 13 May 2022 10:49:40 +0000 (12:49 +0200)
committerMarge Bot <emma+marge@anholt.net>
Thu, 8 Jun 2023 00:37:03 +0000 (00:37 +0000)
This intrinsic must only be used at top-level CF in order
to not break SSA properties.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22096>

src/amd/common/ac_nir.c
src/amd/common/ac_nir.h
src/amd/compiler/aco_instruction_selection.cpp
src/compiler/nir/nir_intrinsics.py

index d062078..cd121e4 100644 (file)
@@ -23,6 +23,18 @@ ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
       return nir_load_vector_arg_amd(b, num_components, .base = arg_index);
 }
 
+void
+ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
+                 nir_ssa_def *val)
+{
+   assert(nir_cursor_current_block(b->cursor)->cf_node.parent->type == nir_cf_node_function);
+
+   if (ac_args->args[arg.arg_index].file == AC_ARG_SGPR)
+      nir_store_scalar_arg_amd(b, val, .base = arg.arg_index);
+   else
+      nir_store_vector_arg_amd(b, val, .base = arg.arg_index);
+}
+
 nir_ssa_def *
 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
                   unsigned rshift, unsigned bitwidth)
index f646fa2..f42761a 100644 (file)
@@ -58,6 +58,9 @@ ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_
    return ac_nir_load_arg_at_offset(b, ac_args, arg, 0);
 }
 
+void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
+                      nir_ssa_def *val);
+
 nir_ssa_def *
 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
                   unsigned rshift, unsigned bitwidth);
index 3f01bfe..a2bb460 100644 (file)
@@ -9224,6 +9224,16 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
                Operand::c32(aco_symbol_lds_ngg_gs_out_vertex_base));
       break;
    }
+   case nir_intrinsic_store_scalar_arg_amd: {
+      ctx->arg_temps[nir_intrinsic_base(instr)] =
+         bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
+      break;
+   }
+   case nir_intrinsic_store_vector_arg_amd: {
+      ctx->arg_temps[nir_intrinsic_base(instr)] =
+         as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
+      break;
+   }
    default:
       isel_err(&instr->instr, "Unimplemented intrinsic instr");
       abort();
index 0e798c0..3e34a49 100644 (file)
@@ -1525,6 +1525,8 @@ intrinsic("load_force_vrs_rates_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_EL
 
 intrinsic("load_scalar_arg_amd", dest_comp=0, bit_sizes=[32], indices=[BASE, ARG_UPPER_BOUND_U32_AMD], flags=[CAN_ELIMINATE, CAN_REORDER])
 intrinsic("load_vector_arg_amd", dest_comp=0, bit_sizes=[32], indices=[BASE, ARG_UPPER_BOUND_U32_AMD], flags=[CAN_ELIMINATE, CAN_REORDER])
+store("scalar_arg_amd", [], [BASE])
+store("vector_arg_amd", [], [BASE])
 
 # src[] = { 32/64-bit base address, 32-bit offset }.
 intrinsic("load_smem_amd", src_comp=[1, 1], dest_comp=0, bit_sizes=[32],