radv: move emitting PRIMGROUP_SIZE for <= GFX9 from the cmdbuf
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 29 Aug 2022 16:27:52 +0000 (18:27 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 13 Sep 2022 08:24:14 +0000 (08:24 +0000)
The number of tessellation patches that is computed from the number
of patch control points might change dynamically too.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18344>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/si_cmd_buffer.c

index 86c82ff..852ceb9 100644 (file)
@@ -3984,12 +3984,18 @@ si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dr
    bool prim_restart_enable = state->dynamic.primitive_restart_enable;
    unsigned patch_control_points = state->graphics_pipeline->tess_patch_control_points;
    struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   unsigned num_tess_patches = 0;
    unsigned ia_multi_vgt_param;
 
+   if (radv_pipeline_has_stage(state->graphics_pipeline, MESA_SHADER_TESS_CTRL)) {
+      struct radv_shader *tcs = state->graphics_pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
+      num_tess_patches = tcs->info.num_tess_patches;
+   }
+
    ia_multi_vgt_param =
       si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output,
                                 draw_vertex_count, topology, prim_restart_enable,
-                                patch_control_points);
+                                patch_control_points, num_tess_patches);
 
    if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
       if (info->gfx_level == GFX9) {
index f6d7ac6..4949e69 100644 (file)
@@ -1414,20 +1414,6 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_graphics_pipeline *pipeline)
    const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
    struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
 
-   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
-      ia_multi_vgt_param.primgroup_size =
-         pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
-   else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
-      ia_multi_vgt_param.primgroup_size = 64;
-   else
-      ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
-
-   /* GS requirement. */
-   ia_multi_vgt_param.partial_es_wave = false;
-   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && pdevice->rad_info.gfx_level <= GFX8)
-      if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pdevice->gs_table_depth - 3)
-         ia_multi_vgt_param.partial_es_wave = true;
-
    ia_multi_vgt_param.ia_switch_on_eoi = false;
    if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
       ia_multi_vgt_param.ia_switch_on_eoi = true;
@@ -1480,7 +1466,6 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_graphics_pipeline *pipeline)
    }
 
    ia_multi_vgt_param.base =
-      S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
       /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
       S_028AA8_MAX_PRIMGRP_IN_WAVE(pdevice->rad_info.gfx_level == GFX8 ? 2 : 0) |
       S_030960_EN_INST_OPT_BASIC(pdevice->rad_info.gfx_level >= GFX9) |
index d93c17d..5c410c8 100644 (file)
@@ -1673,7 +1673,8 @@ void si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *v
 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
                                    bool indirect_draw, bool count_from_stream_output,
                                    uint32_t draw_vertex_count, unsigned topology,
-                                   bool prim_restart_enable, unsigned patch_control_points);
+                                   bool prim_restart_enable, unsigned patch_control_points,
+                                   unsigned num_tess_patches);
 void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
                                 unsigned event, unsigned event_flags, unsigned dst_sel,
                                 unsigned data_sel, uint64_t va, uint32_t new_fence,
@@ -1903,7 +1904,6 @@ struct radv_prim_vertex_count {
 struct radv_ia_multi_vgt_param_helpers {
    uint32_t base;
    bool partial_es_wave;
-   uint8_t primgroup_size;
    bool ia_switch_on_eoi;
    bool partial_vs_wave;
 };
index 795a161..70aa5a0 100644 (file)
@@ -816,7 +816,7 @@ uint32_t
 si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
                           bool indirect_draw, bool count_from_stream_output,
                           uint32_t draw_vertex_count, unsigned topology, bool prim_restart_enable,
-                          unsigned patch_control_points)
+                          unsigned patch_control_points, unsigned num_tess_patches)
 {
    enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
    enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
@@ -830,6 +830,23 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
    bool partial_es_wave = cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.partial_es_wave;
    bool multi_instances_smaller_than_primgroup;
    struct radv_prim_vertex_count prim_vertex_count = prim_size_table[topology];
+   unsigned primgroup_size;
+
+   if (radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_TESS_CTRL)) {
+      primgroup_size = num_tess_patches;
+   } else if (radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_GEOMETRY)) {
+      primgroup_size = 64;
+   } else {
+      primgroup_size = 128; /* recommended without a GS */
+   }
+
+   /* GS requirement. */
+   if (radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_GEOMETRY) &&
+       gfx_level <= GFX8) {
+      unsigned gs_table_depth = cmd_buffer->device->physical_device->gs_table_depth;
+      if (SI_GS_PER_ES / primgroup_size >= gs_table_depth - 3)
+         partial_es_wave = true;
+   }
 
    if (radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_TESS_CTRL)) {
       if (topology == V_008958_DI_PT_PATCH) {
@@ -841,7 +858,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
    multi_instances_smaller_than_primgroup = indirect_draw;
    if (!multi_instances_smaller_than_primgroup && instanced_draw) {
       uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
-      if (num_prims < cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.primgroup_size)
+      if (num_prims < primgroup_size)
          multi_instances_smaller_than_primgroup = true;
    }
 
@@ -930,6 +947,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dra
    }
 
    return cmd_buffer->state.graphics_pipeline->ia_multi_vgt_param.base |
+          S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
           S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
           S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
           S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |