radeonsi: implement fw based mcbp
authorPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Fri, 17 Mar 2023 13:44:42 +0000 (14:44 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 25 Apr 2023 06:47:11 +0000 (06:47 +0000)
Some chips support firmware based mcbp. If supported this means
radeonsi needs to allocate 3 buffers and pass them to the firmware.

From there, the firmware will handle mcbp and register shadowing
on its own so we don't need to insert LOAD packet in the preamble.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986>

src/amd/common/ac_shadowed_regs.c
src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_draw.cpp
src/gallium/drivers/radeonsi/si_state_shaders.cpp
src/gallium/include/winsys/radeon_winsys.h
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h

index f060d55..834bda3 100644 (file)
@@ -4288,6 +4288,8 @@ void ac_create_shadowing_ib_preamble(const struct radeon_info *info,
                CC1_SHADOW_GFX_SH_REGS(1) |
                CC1_SHADOW_GLOBAL_UCONFIG(1));
 
-   for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++)
-      ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address);
+   if (!info->has_fw_based_shadowing) {
+      for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++)
+         ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address);
+   }
 }
index ecdbc5e..e584186 100644 (file)
@@ -41,22 +41,43 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
    if (sctx->has_graphics &&
        (sctx->screen->info.mid_command_buffer_preemption_enabled ||
         sctx->screen->debug_flags & DBG(SHADOW_REGS))) {
-      sctx->shadowed_regs =
-            si_aligned_buffer_create(sctx->b.screen,
-                                     PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
-                                     PIPE_USAGE_DEFAULT,
-                                     SI_SHADOWED_REG_BUFFER_SIZE,
-                                     4096);
-      if (!sctx->shadowed_regs)
-         fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n");
+      if (sctx->screen->info.has_fw_based_shadowing) {
+         sctx->shadowing.registers =
+               si_aligned_buffer_create(sctx->b.screen,
+                                        PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                        PIPE_USAGE_DEFAULT,
+                                        sctx->screen->info.fw_based_mcbp.shadow_size,
+                                        sctx->screen->info.fw_based_mcbp.shadow_alignment);
+         sctx->shadowing.csa =
+               si_aligned_buffer_create(sctx->b.screen,
+                                        PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                        PIPE_USAGE_DEFAULT,
+                                        sctx->screen->info.fw_based_mcbp.csa_size,
+                                        sctx->screen->info.fw_based_mcbp.csa_alignment);
+         if (!sctx->shadowing.registers || !sctx->shadowing.csa)
+            fprintf(stderr, "radeonsi: cannot create register shadowing buffer(s)\n");
+         else
+            sctx->ws->cs_set_mcbp_reg_shadowing_va(&sctx->gfx_cs,
+                                                   sctx->shadowing.registers->gpu_address,
+                                                   sctx->shadowing.csa->gpu_address);
+      } else {
+         sctx->shadowing.registers =
+               si_aligned_buffer_create(sctx->b.screen,
+                                        PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                        PIPE_USAGE_DEFAULT,
+                                        SI_SHADOWED_REG_BUFFER_SIZE,
+                                        4096);
+         if (!sctx->shadowing.registers)
+            fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n");
+      }
    }
 
-   si_init_cs_preamble_state(sctx, sctx->shadowed_regs != NULL);
+   si_init_cs_preamble_state(sctx, sctx->shadowing.registers != NULL);
 
-   if (sctx->shadowed_regs) {
+   if (sctx->shadowing.registers) {
       /* We need to clear the shadowed reg buffer. */
-      si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowed_regs->b.b,
-                             0, sctx->shadowed_regs->bo_size, 0, SI_OP_SYNC_AFTER,
+      si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowing.registers->b.b,
+                             0, sctx->shadowing.registers->bo_size, 0, SI_OP_SYNC_AFTER,
                              SI_COHERENCY_CP, L2_BYPASS);
 
       /* Create the shadowing preamble. */
@@ -72,11 +93,14 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
 
       ac_create_shadowing_ib_preamble(&sctx->screen->info,
                                       (pm4_cmd_add_fn)si_pm4_cmd_add, shadowing_preamble,
-                                      sctx->shadowed_regs->gpu_address, sctx->screen->dpbb_allowed);
+                                      sctx->shadowing.registers->gpu_address, sctx->screen->dpbb_allowed);
 
       /* Initialize shadowed registers as follows. */
-      radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs,
+      radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.registers,
                                 RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
+      if (sctx->shadowing.csa)
+         radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.csa,
+                                   RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
       si_pm4_emit(sctx, shadowing_preamble);
       ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, si_set_context_reg_array);
       si_pm4_emit(sctx, sctx->cs_preamble_state);
index 8116d33..5eb2161 100644 (file)
@@ -2184,7 +2184,7 @@ static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_de
       radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
       radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
       radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
-   } else if (sctx->gfx_level == GFX9 && sctx->shadowed_regs) {
+   } else if (sctx->gfx_level == GFX9 && sctx->shadowing.registers) {
       /* We can't use the COMMON registers with register shadowing. */
       radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
       radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
index e6e94ce..be2d353 100644 (file)
@@ -417,9 +417,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
       radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->border_color_buffer,
                                 RADEON_USAGE_READ | RADEON_PRIO_BORDER_COLORS);
    }
-   if (ctx->shadowed_regs) {
-      radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowed_regs,
+   if (ctx->shadowing.registers) {
+      radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.registers,
                                 RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
+
+      if (ctx->shadowing.csa)
+         radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.csa,
+                                   RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
    }
 
    si_add_all_descriptors_to_bo_list(ctx);
@@ -484,7 +488,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
    if (ctx->screen->use_ngg_culling)
       si_mark_atom_dirty(ctx, &ctx->atoms.s.ngg_cull_state);
 
-   if (first_cs || !ctx->shadowed_regs) {
+   if (first_cs || !ctx->shadowing.registers) {
       /* These don't add any buffers, so skip them with shadowing. */
       si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs);
       /* CLEAR_STATE sets zeros. */
index 08fe458..ab42506 100644 (file)
@@ -350,7 +350,8 @@ static void si_destroy_context(struct pipe_context *context)
    sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
    si_resource_reference(&sctx->eop_bug_scratch, NULL);
    si_resource_reference(&sctx->eop_bug_scratch_tmz, NULL);
-   si_resource_reference(&sctx->shadowed_regs, NULL);
+   si_resource_reference(&sctx->shadowing.registers, NULL);
+   si_resource_reference(&sctx->shadowing.csa, NULL);
 
    si_destroy_compiler(&sctx->compiler);
 
index 282135b..e127e0a 100644 (file)
@@ -962,7 +962,14 @@ struct si_context {
    struct u_log_context *log;
    void *query_result_shader;
    void *sh_query_result_shader;
-   struct si_resource *shadowed_regs;
+   struct {
+      /* Memory where the shadowed registers will be saved and loaded from. */
+      struct si_resource *registers;
+      /* Context Save Area: scratch area to save other required data. Only
+       * used if info->has_fw_based_mcbp is true.
+       */
+      struct si_resource *csa;
+   } shadowing;
 
    void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs);
 
index 71196c0..8bd203a 100644 (file)
@@ -1471,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
    /* draw packet */
    if (index_size) {
       /* Register shadowing doesn't shadow INDEX_TYPE. */
-      if (index_size != sctx->last_index_size || sctx->shadowed_regs ||
+      if (index_size != sctx->last_index_size || sctx->shadowing.registers ||
           (GFX_VERSION == GFX10_3 && disable_instance_packing != sctx->disable_instance_packing)) {
          unsigned index_type;
 
@@ -1598,7 +1598,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
       }
    } else {
       /* Register shadowing requires that we always emit PKT3_NUM_INSTANCES. */
-      if (sctx->shadowed_regs ||
+      if (sctx->shadowing.registers ||
           sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
           sctx->last_instance_count != instance_count) {
          radeon_emit(PKT3(PKT3_NUM_INSTANCES, 0, 0));
index 63f23ff..ec58fa2 100644 (file)
@@ -3693,7 +3693,7 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz)
                                &sctx->cs_preamble_has_vgt_flush;
 
    /* We shouldn't get here if registers are shadowed. */
-   assert(!sctx->shadowed_regs);
+   assert(!sctx->shadowing.registers);
 
    if (*has_vgt_flush)
       return;
@@ -3810,7 +3810,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
                          false, 0, 0, 0);
    }
 
-   if (sctx->shadowed_regs) {
+   if (sctx->shadowing.registers) {
       /* These registers will be shadowed, so set them only once. */
       struct radeon_cmdbuf *cs = &sctx->gfx_cs;
 
@@ -4080,7 +4080,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
 
    assert((tf_ring_size_field & C_030938_SIZE) == 0);
 
-   if (sctx->shadowed_regs) {
+   if (sctx->shadowing.registers) {
       /* These registers will be shadowed, so set them only once. */
       /* TODO: tmz + shadowed_regs support */
       struct radeon_cmdbuf *cs = &sctx->gfx_cs;
index ad58108..46b9c96 100644 (file)
@@ -751,6 +751,12 @@ struct radeon_winsys {
     * Stable pstate
     */
    bool (*cs_set_pstate)(struct radeon_cmdbuf *cs, enum radeon_ctx_pstate state);
+
+   /**
+    * Pass the VAs to the buffers where various information is saved by the FW during mcbp.
+    */
+   void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *cs, uint64_t regs_va,
+                                                                  uint64_t csa_va);
 };
 
 static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw)
index 8916002..b7e1b9c 100644 (file)
@@ -1490,7 +1490,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
    if (acs->ip_type == AMD_IP_GFX)
       ws->gfx_bo_list_counter += cs->num_real_buffers;
 
-   struct drm_amdgpu_cs_chunk chunks[7];
+   struct drm_amdgpu_cs_chunk chunks[8];
    unsigned num_chunks = 0;
 
    /* BO list */
@@ -1565,6 +1565,13 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
       num_chunks++;
    }
 
+   if (ws->info.has_fw_based_shadowing) {
+      chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_CP_GFX_SHADOW;
+      chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_cp_gfx_shadow) / 4;
+      chunks[num_chunks].chunk_data = (uintptr_t)&acs->mcbp_fw_shadow_chunk;
+      num_chunks++;
+   }
+
    /* Fence */
    if (has_user_fence) {
       chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
@@ -1674,6 +1681,9 @@ cleanup:
    if (r || noop)
       amdgpu_fence_signalled(cs->fence);
 
+   if (unlikely(ws->info.has_fw_based_shadowing && acs->mcbp_fw_shadow_chunk.flags && r == 0))
+      acs->mcbp_fw_shadow_chunk.flags = 0;
+
    cs->error_code = r;
 
    /* Only decrement num_active_ioctls for those buffers where we incremented it. */
@@ -1855,6 +1865,16 @@ static bool amdgpu_bo_is_referenced(struct radeon_cmdbuf *rcs,
    return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage);
 }
 
+static void amdgpu_cs_set_mcbp_reg_shadowing_va(struct radeon_cmdbuf *rcs,uint64_t regs_va,
+                                                                   uint64_t csa_va)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+   cs->mcbp_fw_shadow_chunk.shadow_va = regs_va;
+   cs->mcbp_fw_shadow_chunk.csa_va = csa_va;
+   cs->mcbp_fw_shadow_chunk.gds_va = 0;
+   cs->mcbp_fw_shadow_chunk.flags = AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+}
+
 void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
 {
    ws->base.ctx_create = amdgpu_ctx_create;
@@ -1880,4 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
    ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file;
    ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file;
    ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file;
+
+   if (ws->aws->info.has_fw_based_shadowing)
+      ws->base.cs_set_mcbp_reg_shadowing_va = amdgpu_cs_set_mcbp_reg_shadowing_va;
 }
index 13b8bf7..5038463 100644 (file)
@@ -161,6 +161,8 @@ struct amdgpu_cs {
    struct util_queue_fence flush_completed;
    struct pipe_fence_handle *next_fence;
    struct pb_buffer *preamble_ib_bo;
+
+   struct drm_amdgpu_cs_chunk_cp_gfx_shadow mcbp_fw_shadow_chunk;
 };
 
 struct amdgpu_fence {