ac/nir/ngg,radv,radeonsi: nogs use ac_nir_export_(position|parameter)
authorQiang Yu <yuq825@gmail.com>
Sat, 24 Dec 2022 06:55:29 +0000 (14:55 +0800)
committerMarge Bot <emma+marge@anholt.net>
Fri, 3 Feb 2023 12:27:44 +0000 (12:27 +0000)
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20691>

src/amd/common/ac_nir.h
src/amd/common/ac_nir_lower_ngg.c
src/amd/vulkan/radv_shader.c
src/gallium/drivers/radeonsi/si_shader.c

index 81b9b02..fbdec73 100644 (file)
@@ -156,20 +156,23 @@ typedef struct {
 
    unsigned max_workgroup_size;
    unsigned wave_size;
+   uint32_t clipdist_enable_mask;
    const uint8_t *vs_output_param_offset; /* GFX11+ */
+   bool has_param_exports;
    bool can_cull;
    bool disable_streamout;
    bool has_gen_prim_query;
    bool has_xfb_prim_query;
+   bool kill_pointsize;
+   bool force_vrs;
 
    /* VS */
    unsigned num_vertices_per_primitive;
    bool early_prim_export;
    bool passthrough;
    bool use_edgeflags;
-   int primitive_id_location;
+   bool export_primitive_id;
    uint32_t instance_rate_inputs;
-   uint32_t clipdist_enable_mask;
    uint32_t user_clip_plane_enable_mask;
 
    /* GS */
index dc20f59..875a54d 100644 (file)
@@ -82,7 +82,6 @@ typedef struct
    bool streamout_enabled;
    bool has_user_edgeflags;
    unsigned max_num_waves;
-   unsigned position_store_base;
 
    /* LDS params */
    unsigned pervertex_lds_bytes;
@@ -597,14 +596,7 @@ emit_store_ngg_nogs_es_primitive_id(nir_builder *b, lower_ngg_nogs_state *st)
       prim_id = nir_load_primitive_id(b);
    }
 
-   nir_io_semantics io_sem = {
-      .location = VARYING_SLOT_PRIMITIVE_ID,
-      .num_slots = 1,
-   };
-
-   nir_store_output(b, prim_id, nir_imm_zero(b, 1, 32),
-                    .base = st->options->primitive_id_location,
-                    .src_type = nir_type_uint32, .io_semantics = io_sem);
+   st->outputs[VARYING_SLOT_PRIMITIVE_ID][0] = prim_id;
 
    /* Update outputs_written to reflect that the pass added a new output. */
    b->shader->info.outputs_written |= VARYING_BIT_PRIMITIVE_ID;
@@ -777,9 +769,6 @@ remove_extra_pos_output(nir_builder *b, nir_instr *instr, void *state)
    nir_ssa_def *store_val = intrin->src[0].ssa;
    unsigned store_pos_component = nir_intrinsic_component(intrin);
 
-   /* save the store base for re-construct store output instruction */
-   s->position_store_base = nir_intrinsic_base(intrin);
-
    nir_instr_remove(instr);
 
    if (store_val->parent_instr->type == nir_instr_type_alu) {
@@ -2105,12 +2094,8 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
             type[c] = src_type;
          }
 
-         /* remove the edge flag output anyway as it should not be passed to next stage */
-         bool is_edge_slot = slot == VARYING_SLOT_EDGE;
-         /* remove non-pos-export slot when GFX11, they are written to buffer memory */
-         bool is_pos_export_slot = slot < VARYING_SLOT_MAX && (BITFIELD64_BIT(slot) & POS_EXPORT_MASK);
-         if (is_edge_slot || (s->options->gfx_level >= GFX11 && !is_pos_export_slot))
-            nir_instr_remove(instr);
+         /* remove all store output instructions */
+         nir_instr_remove(instr);
       }
    }
 }
@@ -2260,9 +2245,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
    };
 
    const bool need_prim_id_store_shared =
-      options->primitive_id_location >= 0 && shader->info.stage == MESA_SHADER_VERTEX;
+      options->export_primitive_id && shader->info.stage == MESA_SHADER_VERTEX;
 
-   if (options->primitive_id_location >= 0) {
+   if (options->export_primitive_id) {
       nir_variable *prim_id_var = nir_variable_create(shader, nir_var_shader_out, glsl_uint_type(), "ngg_prim_id");
       prim_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
       prim_id_var->data.driver_location = VARYING_SLOT_PRIMITIVE_ID;
@@ -2326,7 +2311,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
       ngg_nogs_get_pervertex_lds_size(shader->info.stage,
                                       shader->num_outputs,
                                       state.streamout_enabled,
-                                      options->primitive_id_location >= 0,
+                                      options->export_primitive_id,
                                       state.has_user_edgeflags);
 
    if (need_prim_id_store_shared) {
@@ -2337,7 +2322,6 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
                             .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
    }
 
-   nir_intrinsic_instr *export_vertex_instr;
    nir_ssa_def *es_thread =
       options->can_cull ? nir_load_var(b, es_accepted_var) : has_input_vertex(b);
 
@@ -2357,11 +2341,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
       nir_cf_reinsert(&extracted, b->cursor);
       b->cursor = nir_after_cf_list(&if_es_thread->then_list);
 
-      if (options->primitive_id_location >= 0)
+      if (options->export_primitive_id)
          emit_store_ngg_nogs_es_primitive_id(b, &state);
-
-      /* Export all vertex attributes (including the primitive ID) */
-      export_vertex_instr = nir_export_vertex_amd(b);
    }
    nir_pop_if(b, if_es_thread);
 
@@ -2376,12 +2357,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
        * it seems that it's best to put the position export always at the end, and
        * then let ACO schedule it up (slightly) only when early prim export is used.
        */
-      b->cursor = nir_before_instr(&export_vertex_instr->instr);
+      b->cursor = nir_after_cf_list(&if_es_thread->then_list);
 
       nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var);
-      nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 };
-      nir_store_output(b, pos_val, nir_imm_int(b, 0), .base = state.position_store_base,
-                       .component = 0, .io_semantics = io_sem, .src_type = nir_type_float32);
+      for (int i = 0; i < 4; i++)
+         state.outputs[VARYING_SLOT_POS][i] = nir_channel(b, pos_val, i);
    }
 
    /* Gather outputs data and types */
@@ -2407,23 +2387,40 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
       emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var));
    }
 
-   /* Export varyings for GFX11+ */
-   if (state.options->gfx_level >= GFX11) {
-      vs_output outputs[64];
-
-      b->cursor = nir_after_cf_list(&if_es_thread->then_list);
-      unsigned num_outputs = gather_vs_outputs(b, outputs, &state);
-
-      if (num_outputs) {
-         b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
-         create_vertex_param_phis(b, num_outputs, outputs);
-
-         b->cursor = nir_after_cf_list(&impl->body);
+   uint64_t export_outputs = shader->info.outputs_written;
+   if (options->kill_pointsize)
+      export_outputs &= ~VARYING_BIT_PSIZ;
 
-         if (!num_es_threads)
-            num_es_threads = nir_load_merged_wave_info_amd(b);
-         export_vertex_params_gfx11(b, NULL, num_es_threads, num_outputs, outputs,
-                                    options->vs_output_param_offset);
+   b->cursor = nir_after_cf_list(&if_es_thread->then_list);
+   ac_nir_export_position(b, options->gfx_level,
+                          options->clipdist_enable_mask,
+                          !options->has_param_exports,
+                          options->force_vrs,
+                          export_outputs, state.outputs);
+
+   if (options->has_param_exports) {
+      if (state.options->gfx_level >= GFX11) {
+         /* Export varyings for GFX11+ */
+         vs_output outputs[64];
+         unsigned num_outputs = gather_vs_outputs(b, outputs, &state);
+
+         if (num_outputs) {
+            b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
+            create_vertex_param_phis(b, num_outputs, outputs);
+
+            b->cursor = nir_after_cf_list(&impl->body);
+
+            if (!num_es_threads)
+               num_es_threads = nir_load_merged_wave_info_amd(b);
+            export_vertex_params_gfx11(b, NULL, num_es_threads, num_outputs, outputs,
+                                       options->vs_output_param_offset);
+         }
+      } else {
+         ac_nir_export_parameter(b, options->vs_output_param_offset,
+                                 shader->info.outputs_written,
+                                 shader->info.outputs_written_16bit,
+                                 state.outputs, state.outputs_16bit_lo,
+                                 state.outputs_16bit_hi);
       }
    }
 
index 6ca949f..b05fa2e 100644 (file)
@@ -1444,7 +1444,9 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
    options.gfx_level = device->physical_device->rad_info.gfx_level;
    options.max_workgroup_size = info->workgroup_size;
    options.wave_size = info->wave_size;
+   options.clipdist_enable_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
    options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
+   options.has_param_exports = info->outinfo.param_exports;
    options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
    options.disable_streamout = !device->physical_device->use_ngg_streamout;
    options.has_gen_prim_query = info->has_ngg_prim_query;
@@ -1460,7 +1462,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
       options.num_vertices_per_primitive = num_vertices_per_prim;
       options.early_prim_export = info->has_ngg_early_prim_export;
       options.passthrough = info->is_ngg_passthrough;
-      options.primitive_id_location = info->outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1;
+      options.export_primitive_id = info->outinfo.export_prim_id;
       options.instance_rate_inputs = pl_key->vs.instance_rate_inputs << VERT_ATTRIB_GENERIC0;
 
       NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);
index 0c6e615..2d7d9cb 100644 (file)
@@ -1604,6 +1604,10 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
    const union si_shader_key *key = &shader->key;
    assert(key->ge.as_ngg);
 
+   unsigned clipdist_mask =
+      (sel->info.clipdist_mask & ~key->ge.opt.kill_clip_distances) |
+      sel->info.culldist_mask;
+
    ac_nir_lower_ngg_options options = {
       .family = sel->screen->info.family,
       .gfx_level = sel->screen->info.gfx_level,
@@ -1612,6 +1616,10 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
       .can_cull = !!key->ge.opt.ngg_culling,
       .disable_streamout = key->ge.opt.remove_streamout,
       .vs_output_param_offset = shader->info.vs_output_param_offset,
+      .has_param_exports = shader->info.nr_param_exports,
+      .clipdist_enable_mask = clipdist_mask,
+      .kill_pointsize = key->ge.opt.kill_pointsize,
+      .force_vrs = sel->screen->options.vrs2x2,
    };
 
    if (nir->info.stage == MESA_SHADER_VERTEX ||
@@ -1635,8 +1643,6 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
 
       unsigned clip_plane_enable =
          SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(key->ge.opt.ngg_culling);
-      unsigned clipdist_mask =
-         (sel->info.clipdist_mask & clip_plane_enable) | sel->info.culldist_mask;
 
       options.num_vertices_per_primitive = gfx10_ngg_get_vertices_per_prim(shader);
       options.early_prim_export = gfx10_ngg_export_prim_early(shader);
@@ -1644,10 +1650,8 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
       options.use_edgeflags = gfx10_edgeflags_have_effect(shader);
       options.has_gen_prim_query = options.has_xfb_prim_query =
          sel->screen->use_ngg_streamout && !sel->info.base.vs.blit_sgprs_amd;
-      options.primitive_id_location =
-         key->ge.mono.u.vs_export_prim_id ? sel->info.num_outputs : -1;
+      options.export_primitive_id = key->ge.mono.u.vs_export_prim_id;
       options.instance_rate_inputs = instance_rate_inputs;
-      options.clipdist_enable_mask = clipdist_mask;
       options.user_clip_plane_enable_mask = clip_plane_enable;
 
       NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);