radv: Remove first_task and ib_addr/ib_stride.
authorTimur Kristóf <timur.kristof@gmail.com>
Mon, 27 Mar 2023 14:23:53 +0000 (16:23 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 29 Mar 2023 15:08:55 +0000 (15:08 +0000)
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22139>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_nir_lower_abi.c
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader_args.c
src/amd/vulkan/radv_shader_args.h
src/amd/vulkan/radv_shader_info.c

index 4a22ed4..56ccd24 100644 (file)
@@ -7896,9 +7896,8 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
    cmd_buffer->state.last_drawid = -1;
    cmd_buffer->state.last_vertex_offset = -1;
 
-   /* Note: firstTask/firstVertex is not supported by this draw packet. */
-   uint32_t xyz_dim_reg = (base_reg + 4 - SI_SH_REG_OFFSET) >> 2;
-   uint32_t draw_id_reg = (base_reg + 16 - SI_SH_REG_OFFSET) >> 2;
+   uint32_t xyz_dim_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
+   uint32_t draw_id_reg = (base_reg + 12 - SI_SH_REG_OFFSET) >> 2;
 
    uint32_t draw_id_enable = !!cmd_buffer->state.graphics_pipeline->uses_drawid;
    uint32_t xyz_dim_enable = 1; /* TODO: disable XYZ_DIM when unneeded */
@@ -8014,7 +8013,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer)
    assert(ring_entry_loc->sgpr_idx != -1);
 
    uint32_t base_reg = cmd_buffer->state.graphics_pipeline->vtx_base_sgpr;
-   uint32_t xyz_dim_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
+   uint32_t xyz_dim_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
    uint32_t ring_entry_reg = ((base_reg + ring_entry_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2;
    uint32_t xyz_dim_en = 1; /* TODO: disable XYZ_DIM when unneeded */
    uint32_t mode1_en = 1;   /* legacy fast launch mode */
@@ -8080,15 +8079,13 @@ radv_emit_userdata_vertex_drawid(struct radv_cmd_buffer *cmd_buffer, uint32_t ve
 
 ALWAYS_INLINE static void
 radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
-                        const uint32_t x, const uint32_t y, const uint32_t z,
-                        const uint32_t first_task)
+                        const uint32_t x, const uint32_t y, const uint32_t z)
 {
    struct radv_cmd_state *state = &cmd_buffer->state;
    struct radeon_cmdbuf *cs = cmd_buffer->cs;
    const bool uses_drawid = state->graphics_pipeline->uses_drawid;
 
    radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, state->graphics_pipeline->vtx_emit_num);
-   radeon_emit(cs, first_task);
    radeon_emit(cs, x);
    radeon_emit(cs, y);
    radeon_emit(cs, z);
@@ -8100,16 +8097,13 @@ radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
 }
 
 ALWAYS_INLINE static void
-radv_emit_userdata_mesh_first_task_0_draw_id_0(struct radv_cmd_buffer *cmd_buffer)
+radv_emit_userdata_mesh_draw_id_0(struct radv_cmd_buffer *cmd_buffer)
 {
    struct radv_cmd_state *state = &cmd_buffer->state;
    struct radeon_cmdbuf *cs = cmd_buffer->cs;
    struct radv_graphics_pipeline *pipeline = state->graphics_pipeline;
    const bool uses_drawid = pipeline->uses_drawid;
 
-   radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr, 1);
-   radeon_emit(cs, 0);
-
    if (uses_drawid) {
       radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr + (pipeline->vtx_emit_num - 1) * 4, 1);
       radeon_emit(cs, 0);
@@ -8117,28 +8111,8 @@ radv_emit_userdata_mesh_first_task_0_draw_id_0(struct radv_cmd_buffer *cmd_buffe
 }
 
 ALWAYS_INLINE static void
-radv_emit_userdata_task_ib_only(struct radv_cmd_buffer *cmd_buffer, uint64_t ib_va,
-                                uint32_t ib_stride)
-{
-   struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
-   struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
-
-   const struct radv_userdata_info *task_ib_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_IB);
-
-   if (task_ib_loc->sgpr_idx != -1) {
-      assert(task_ib_loc->num_sgprs == 3);
-      unsigned task_ib_reg = R_00B900_COMPUTE_USER_DATA_0 + task_ib_loc->sgpr_idx * 4;
-
-      radeon_set_sh_reg_seq(cs, task_ib_reg, 3);
-      radeon_emit(cs, ib_va);
-      radeon_emit(cs, ib_va >> 32);
-      radeon_emit(cs, ib_stride);
-   }
-}
-
-ALWAYS_INLINE static void
 radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z,
-                        uint32_t draw_id, uint32_t first_task, uint64_t ib_va)
+                        uint32_t draw_id)
 {
    struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
    struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
@@ -8164,8 +8138,6 @@ radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t
       radeon_set_sh_reg_seq(cs, draw_id_reg, 1);
       radeon_emit(cs, draw_id);
    }
-
-   radv_emit_userdata_task_ib_only(cmd_buffer, ib_va, first_task ? 8 : 0);
 }
 
 /* Bind an internal index buffer for GPUs that hang with 0-sized index buffers to handle robustness2
@@ -8361,13 +8333,12 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r
 
 ALWAYS_INLINE static void
 radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer,
-                                  uint32_t x, uint32_t y, uint32_t z,
-                                  uint32_t first_task)
+                                  uint32_t x, uint32_t y, uint32_t z)
 {
    const uint32_t view_mask = cmd_buffer->state.render.view_mask;
    const uint32_t count = x * y * z;
 
-   radv_emit_userdata_mesh(cmd_buffer, x, y, z, first_task);
+   radv_emit_userdata_mesh(cmd_buffer, x, y, z);
 
    if (!view_mask) {
       radv_cs_emit_draw_packet(cmd_buffer, count, 0);
@@ -8404,7 +8375,7 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
    radeon_emit(cs, va);
    radeon_emit(cs, va >> 32);
 
-   radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
+   radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
 
    if (!state->render.view_mask) {
       radv_cs_emit_indirect_mesh_draw_packet(cmd_buffer, info->count, count_va, info->stride);
@@ -8418,23 +8389,14 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
 
 ALWAYS_INLINE static void
 radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
-                                       uint32_t z, uint32_t first_task)
+                                       uint32_t z)
 {
-   uint64_t fake_ib_va = 0;
    const uint32_t view_mask = cmd_buffer->state.render.view_mask;
    const unsigned num_views = MAX2(1, util_bitcount(view_mask));
    unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */
 
-   if (first_task) {
-      /* Pass this as the IB to the shader for emulating firstTask in task shaders. */
-      uint32_t fake_ib_dwords[2] = {x, first_task};
-      unsigned fake_ib_offset;
-      radv_cmd_buffer_upload_data(cmd_buffer, 8, fake_ib_dwords, &fake_ib_offset);
-      fake_ib_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + fake_ib_offset;
-   }
-
-   radv_emit_userdata_task(cmd_buffer, x, y, z, 0, first_task, fake_ib_va);
-   radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
+   radv_emit_userdata_task(cmd_buffer, x, y, z, 0);
+   radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
    radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
                                     cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
                                     ace_predication_size);
@@ -8453,8 +8415,7 @@ radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint3
 
 static void
 radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
-                                         const struct radv_draw_info *info, uint64_t nv_ib_va,
-                                         uint32_t nv_ib_stride)
+                                         const struct radv_draw_info *info)
 {
    const uint32_t view_mask = cmd_buffer->state.render.view_mask;
    struct radeon_winsys *ws = cmd_buffer->device->ws;
@@ -8510,8 +8471,7 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
    }
 
    radv_cs_add_buffer(ws, cmd_buffer->ace_internal.cs, info->indirect->bo);
-   radv_emit_userdata_task_ib_only(cmd_buffer, nv_ib_va, nv_ib_stride);
-   radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
+   radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
    radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
                                     cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
                                     ace_predication_size);
@@ -9274,9 +9234,9 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y,
       return;
 
    if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
-      radv_emit_direct_taskmesh_draw_packets(cmd_buffer, x, y, z, 0);
+      radv_emit_direct_taskmesh_draw_packets(cmd_buffer, x, y, z);
    } else {
-      radv_emit_direct_mesh_draw_packet(cmd_buffer, x, y, z, 0);
+      radv_emit_direct_mesh_draw_packet(cmd_buffer, x, y, z);
    }
 
    radv_after_draw(cmd_buffer);
@@ -9307,7 +9267,7 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer
       return;
 
    if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
-      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, 0, 0);
+      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info);
    } else {
       radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info);
    }
@@ -9342,7 +9302,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
       return;
 
    if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
-      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, 0, 0);
+      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info);
    } else {
       radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info);
    }
index 9795145..95a79c0 100644 (file)
@@ -273,10 +273,10 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
       break;
    case nir_intrinsic_load_task_ib_addr:
-      replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr);
+      replacement = nir_imm_zero(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
       break;
    case nir_intrinsic_load_task_ib_stride:
-      replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride);
+      replacement = nir_imm_zero(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
       break;
    case nir_intrinsic_load_lshs_vertex_stride_amd: {
       unsigned io_num = stage == MESA_SHADER_VERTEX ?
index 58e6d1f..c54655f 100644 (file)
@@ -594,39 +594,6 @@ radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_s
    return progress;
 }
 
-/* Emulates NV_mesh_shader first_task using first_vertex. */
-static bool
-radv_lower_ms_workgroup_id(nir_shader *nir)
-{
-   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
-   bool progress = false;
-   nir_builder b;
-   nir_builder_init(&b, impl);
-
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr_safe(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-         if (intrin->intrinsic != nir_intrinsic_load_workgroup_id)
-            continue;
-
-         progress = true;
-         b.cursor = nir_after_instr(instr);
-         nir_ssa_def *x = nir_channel(&b, &intrin->dest.ssa, 0);
-         nir_ssa_def *x_full = nir_iadd(&b, x, nir_load_first_vertex(&b));
-         nir_ssa_def *v = nir_vector_insert_imm(&b, &intrin->dest.ssa, x_full, 0);
-         nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, v, v->parent_instr);
-      }
-   }
-
-   nir_metadata preserved =
-      progress ? (nir_metadata_block_index | nir_metadata_dominance) : nir_metadata_all;
-   nir_metadata_preserve(impl, preserved);
-   return progress;
-}
-
 static bool
 is_sincos(const nir_instr *instr, const void *_)
 {
@@ -889,9 +856,6 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
    NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
 
    if (nir->info.stage == MESA_SHADER_MESH) {
-      /* NV_mesh_shader: include first_task (aka. first_vertex) in workgroup ID. */
-      NIR_PASS(_, nir, radv_lower_ms_workgroup_id);
-
       /* Mesh shaders only have a 1D "vertex index" which we use
        * as "workgroup index" to emulate the 3D workgroup ID.
        */
@@ -1204,7 +1168,6 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
                  device->physical_device->rad_info.gfx_level, false);
       return true;
    } else if (nir->info.stage == MESA_SHADER_TASK) {
-      ac_nir_apply_first_task_to_task_shader(nir);
       ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
                                        device->physical_device->task_info.num_entries);
       return true;
index 4f414cc..8091b25 100644 (file)
@@ -108,7 +108,7 @@ count_tes_user_sgprs(const struct radv_pipeline_key *key)
 static uint8_t
 count_ms_user_sgprs(const struct radv_shader_info *info)
 {
-   uint8_t count = 1 + 3; /* firstTask + num_work_groups[3] */
+   uint8_t count = 3; /* num_work_groups[3] */
 
    if (info->vs.needs_draw_id)
       count++;
@@ -415,7 +415,6 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
 static void
 declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
 {
-   ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
    ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
    if (info->vs.needs_draw_id) {
       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
@@ -553,7 +552,7 @@ static void
 set_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx)
 {
    unsigned vs_num =
-      args->ac.base_vertex.used + 3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
+      3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
    set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
 
    if (args->ac.task_ring_entry.used)
@@ -664,8 +663,6 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
 
       if (stage == MESA_SHADER_TASK) {
          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
-         ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr);
-         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride);
       }
 
       for (int i = 0; i < 3; i++) {
@@ -947,10 +944,6 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
       if (args->ac.task_ring_entry.used) {
          set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1);
       }
-      if (args->task_ib_addr.used) {
-         assert(args->task_ib_stride.used);
-         set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3);
-      }
       break;
    case MESA_SHADER_VERTEX:
       if (args->ac.view_index.used)
index edb74d3..6efbfae 100644 (file)
@@ -51,10 +51,6 @@ struct radv_shader_args {
    struct ac_arg ngg_viewport_scale[2];
    struct ac_arg ngg_viewport_translate[2];
 
-   /* Task shaders */
-   struct ac_arg task_ib_addr;
-   struct ac_arg task_ib_stride;
-
    /* Fragment shaders */
    struct ac_arg ps_epilog_pc;
    struct ac_arg ps_num_samples;
index 9d645a0..27cd0f0 100644 (file)
@@ -514,7 +514,6 @@ gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info)
     * - drawing 1 input vertex ~ launching 1 mesh shader workgroup
     *
     * In the shader:
-    * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
     * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
     *
     * Notes:
@@ -711,10 +710,6 @@ gather_shader_info_task(const nir_shader *nir, struct radv_shader_info *info)
     * use them.
     */
 
-   /* Needed to address the IB to read firstTask in NV_mesh_shader. */
-   info->vs.needs_draw_id |=
-      BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID);
-
    /* Needed to address the task draw/payload rings. */
    info->cs.uses_block_id[0] = true;
    info->cs.uses_block_id[1] = true;