nir: Add NVIDIA-specific I/O intrinsics
authorFaith Ekstrand <faith.ekstrand@collabora.com>
Sat, 30 Sep 2023 07:16:51 +0000 (02:16 -0500)
committerMarge Bot <emma+marge@anholt.net>
Tue, 24 Oct 2023 22:21:18 +0000 (22:21 +0000)
NVIDIA hardware doesn't take a vertex index for per-vertex I/O.
Instead, it takes an offset into the primitive.  This has to be fetched
using a combination of SR_INVOCATION_INFO and the ISBERD instruction.
To keep things simple and allow for maximum CSE, we do the lowering in
NIR and patch the load/store_per_vertex_input/output intrinsic.

Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25000>

src/compiler/nir/nir.h
src/compiler/nir/nir_divergence_analysis.c
src/compiler/nir/nir_intrinsics.py

index 13a935d..16be72b 100644 (file)
@@ -2085,6 +2085,7 @@ nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
               instr->intrinsic == nir_intrinsic_bindless_image_load ||
               instr->intrinsic == nir_intrinsic_image_deref_load ||
               instr->intrinsic == nir_intrinsic_image_load ||
+              instr->intrinsic == nir_intrinsic_ald_nv ||
               instr->intrinsic == nir_intrinsic_load_sysval_nv) {
       return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER;
    } else {
index c6f24bb..37ce3cc 100644 (file)
@@ -614,6 +614,9 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
    case nir_intrinsic_load_ray_triangle_vertex_positions:
    case nir_intrinsic_cmat_extract:
    case nir_intrinsic_cmat_muladd_amd:
+   case nir_intrinsic_isberd_nv:
+   case nir_intrinsic_al2p_nv:
+   case nir_intrinsic_ald_nv:
       is_divergent = true;
       break;
 
index a3b570f..576defa 100644 (file)
@@ -1958,6 +1958,19 @@ system_value("ray_query_global_intel", 1, bit_sizes=[64])
 # NVIDIA-specific intrinsics
 intrinsic("load_sysval_nv", dest_comp=1, src_comp=[], bit_sizes=[32, 64],
           indices=[ACCESS, BASE], flags=[CAN_ELIMINATE])
+intrinsic("isberd_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],
+          flags=[CAN_ELIMINATE, CAN_REORDER])
+intrinsic("al2p_nv", dest_comp=1, src_comp=[1], bit_sizes=[32],
+          indices=[BASE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER])
+# src[] = { vtx, offset }.
+# FLAGS is struct nak_nir_attr_io_flags
+intrinsic("ald_nv", dest_comp=0, src_comp=[1, 1], bit_sizes=[32],
+          indices=[BASE, RANGE_BASE, RANGE, FLAGS, ACCESS],
+          flags=[CAN_ELIMINATE])
+# src[] = { data, vtx, offset }.
+# FLAGS is struct nak_nir_attr_io_flags
+intrinsic("ast_nv", src_comp=[0, 1, 1],
+          indices=[BASE, RANGE_BASE, RANGE, FLAGS], flags=[])
 
 # In order to deal with flipped render targets, gl_PointCoord may be flipped
 # in the shader requiring a shader key or extra instructions or it may be