pvr: Submit PR commands
authorKarmjit Mahil <Karmjit.Mahil@imgtec.com>
Mon, 26 Jun 2023 10:52:39 +0000 (11:52 +0100)
committerMarge Bot <emma+marge@anholt.net>
Fri, 14 Jul 2023 10:45:49 +0000 (10:45 +0000)
This commit adds a partial render command to job submission.
For geom only jobs we must always submit a pr command in case we
enter SPM. For now, for geom+frag jobs, we'll also always submit
a pr command event.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24138>

src/imagination/vulkan/pvr_cmd_buffer.c
src/imagination/vulkan/pvr_job_render.c
src/imagination/vulkan/pvr_job_render.h
src/imagination/vulkan/winsys/pvr_winsys.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c

index 1cfe83e..805c53c 100644 (file)
@@ -1459,6 +1459,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
                               job->pds_bgnd_reg_values);
    }
 
+   /* TODO: In some cases a PR can be removed by storing to the color attachment
+    * and have the background object load directly from it instead of using the
+    * scratch buffer. In those cases we can also set this to "false" and avoid
+    * extra fw overhead.
+    */
+   /* The scratch buffer is always needed and allocated to avoid data loss in
+    * case SPM is hit so set the flag unconditionally.
+    */
+   job->requires_spm_scratch_buffer = true;
+
+   memcpy(job->pr_pbe_reg_words,
+          &framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
+          sizeof(job->pbe_reg_words));
+   job->pr_pds_pixel_event_data_offset =
+      framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset;
+
    STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
                  ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
    typed_memcpy(job->pds_pr_bgnd_reg_values,
@@ -1694,16 +1710,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
    job->run_frag = true;
    job->geometry_terminate = true;
 
-   /* TODO: In some cases a PR can be removed by storing to the color attachment
-    * and have the background object load directly from it instead of using the
-    * scratch buffer. In those cases we can also set this to "false" and avoid
-    * extra fw overhead.
-    */
-   /* The scratch buffer is always needed and allocated to avoid data loss in
-    * case SPM is hit so set the flag unconditionally.
-    */
-   job->requires_spm_scratch_buffer = true;
-
    return VK_SUCCESS;
 }
 
index 35a6bb9..3354d80 100644 (file)
@@ -1064,6 +1064,55 @@ pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
    pvr_geom_state_flags_init(job, &state->flags);
 }
 
+static inline uint32_t pvr_frag_km_stream_pbe_reg_words_offset(
+   const struct pvr_device_info *const dev_info)
+{
+   uint32_t offset = 0;
+
+   offset += pvr_cmd_length(KMD_STREAM_HDR);
+   offset += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
+   offset += pvr_cmd_length(CR_ISP_DBIAS_BASE);
+   offset += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
+   offset += pvr_cmd_length(CR_ISP_ZLSCTL);
+   offset += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
+   offset += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
+
+   if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup))
+      offset += pvr_cmd_length(CR_FB_CDC_ZLS);
+
+   return PVR_DW_TO_BYTES(offset);
+}
+
+#define DWORDS_PER_U64 2
+
+static inline uint32_t pvr_frag_km_stream_pds_eot_data_addr_offset(
+   const struct pvr_device_info *const dev_info)
+{
+   uint32_t offset = 0;
+
+   offset += pvr_frag_km_stream_pbe_reg_words_offset(dev_info) / 4U;
+   offset +=
+      PVR_MAX_COLOR_ATTACHMENTS * ROGUE_NUM_PBESTATE_REG_WORDS * DWORDS_PER_U64;
+   offset += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
+   offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
+   offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
+   offset += PVRX(KMD_STREAM_USC_CLEAR_REGISTER_COUNT) *
+             pvr_cmd_length(CR_USC_CLEAR_REGISTER);
+   offset += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
+   offset += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
+   offset += pvr_cmd_length(CR_ISP_BGOBJVALS);
+   offset += pvr_cmd_length(CR_ISP_AA);
+   offset += pvr_cmd_length(CR_ISP_CTL);
+   offset += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
+
+   if (PVR_HAS_FEATURE(dev_info, cluster_grouping))
+      offset += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM);
+
+   offset += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
+
+   return PVR_DW_TO_BYTES(offset);
+}
+
 static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
                                        struct pvr_render_job *job,
                                        struct pvr_winsys_fragment_state *state)
@@ -1197,7 +1246,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
       stream_ptr += pvr_cmd_length(CR_FB_CDC_ZLS);
    }
 
-#define DWORDS_PER_U64 2
+   /* Make sure that the pvr_frag_km_...() function is returning the correct
+    * offset.
+    */
+   assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
+          pvr_frag_km_stream_pbe_reg_words_offset(dev_info));
 
    STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == PVR_MAX_COLOR_ATTACHMENTS);
    STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) ==
@@ -1338,6 +1391,12 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
    /* clang-format on */
    stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
 
+   /* Make sure that the pvr_frag_km_...() function is returning the correct
+    * offset.
+    */
+   assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
+          pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info));
+
    pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
       value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
    }
@@ -1388,6 +1447,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
    }
 }
 
+#undef DWORDS_PER_U64
+
 static void
 pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
                                struct pvr_render_job *job,
@@ -1452,6 +1513,53 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
    pvr_frag_state_flags_init(job, &state->flags);
 }
 
+/**
+ * \brief Sets up the fragment state for a Partial Render (PR) based on the
+ * state for a normal fragment job.
+ *
+ * The state of a fragment PR is almost the same as of that for a normal
+ * fragment job apart the PBE words and the EOT program, both of which are
+ * necessary for the render to use the SPM scratch buffer instead of the final
+ * render targets.
+ *
+ * By basing the fragment PR state on that of a normal fragment state,
+ * repacking of the same words can be avoided as we end up mostly doing copies
+ * instead.
+ */
+static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
+   const struct pvr_render_ctx *ctx,
+   struct pvr_render_job *job,
+   struct vk_sync *wait,
+   struct pvr_winsys_fragment_state *frag,
+   struct pvr_winsys_fragment_state *state)
+{
+   const struct pvr_device_info *const dev_info =
+      &ctx->device->pdevice->dev_info;
+   const uint32_t pbe_reg_byte_offset =
+      pvr_frag_km_stream_pbe_reg_words_offset(dev_info);
+   const uint32_t eot_data_addr_byte_offset =
+      pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info);
+
+   /* Massive copy :( */
+   *state = *frag;
+
+   assert(state->fw_stream_len >=
+          pbe_reg_byte_offset + sizeof(job->pr_pbe_reg_words));
+   memcpy(&state->fw_stream[pbe_reg_byte_offset],
+          job->pr_pbe_reg_words,
+          sizeof(job->pr_pbe_reg_words));
+
+   /* TODO: Update this when csbgen is byte instead of dword granular. */
+   assert(state->fw_stream_len >=
+          eot_data_addr_byte_offset +
+             PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA)));
+   pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset],
+                 CR_EVENT_PIXEL_PDS_DATA,
+                 eot_pds_data) {
+      eot_pds_data.addr = PVR_DEV_ADDR(job->pr_pds_pixel_event_data_offset);
+   }
+}
+
 static void pvr_render_job_ws_submit_info_init(
    struct pvr_render_ctx *ctx,
    struct pvr_render_job *job,
@@ -1472,14 +1580,28 @@ static void pvr_render_job_ws_submit_info_init(
                                          wait_geom,
                                          &submit_info->geometry);
 
-   if (job->run_frag) {
-      submit_info->run_frag = true;
+   submit_info->has_fragment_job = job->run_frag;
 
-      pvr_render_job_ws_fragment_state_init(ctx,
-                                            job,
-                                            wait_frag,
-                                            &submit_info->fragment);
-   }
+   /* TODO: Move the job setup from queue submit into cmd_buffer if possible. */
+
+   /* TODO: See if it's worth avoiding setting up the fragment state and setup
+    * the pr state directly if `!job->run_frag`. For now we'll always set it up.
+    */
+   pvr_render_job_ws_fragment_state_init(ctx,
+                                         job,
+                                         wait_frag,
+                                         &submit_info->fragment);
+
+   /* TODO: In some cases we could eliminate the pr and use the frag directly in
+    * case we enter SPM. There's likely some performance improvement to be had
+    * there. For now we'll always setup the pr.
+    */
+   pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
+      ctx,
+      job,
+      wait_frag,
+      &submit_info->fragment,
+      &submit_info->fragment_pr);
 }
 
 VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
index ae21c3e..042cef4 100644 (file)
@@ -86,6 +86,7 @@ struct pvr_render_job {
    };
 
    uint32_t pds_pixel_event_data_offset;
+   uint32_t pr_pds_pixel_event_data_offset;
 
    pvr_dev_addr_t ctrl_stream_addr;
 
@@ -147,6 +148,8 @@ struct pvr_render_job {
                  "Cannot store both PBESTATE_REG_WORD{0,1}");
    uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
                          [ROGUE_NUM_PBESTATE_REG_WORDS];
+   uint64_t pr_pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
+                            [ROGUE_NUM_PBESTATE_REG_WORDS];
 
    static_assert(pvr_cmd_length(CR_PDS_BGRND0_BASE) == 2,
                  "CR_PDS_BGRND0_BASE cannot be stored in uint64_t");
index a8efc5a..e232ac9 100644 (file)
@@ -314,9 +314,7 @@ struct pvr_winsys_render_submit_info {
 
    uint32_t frame_num;
    uint32_t job_num;
-
-   /* FIXME: should this be flags instead? */
-   bool run_frag;
+   bool has_fragment_job;
 
    struct pvr_winsys_geometry_state {
       /* Firmware stream buffer. This is the maximum possible size taking into
@@ -351,7 +349,7 @@ struct pvr_winsys_render_submit_info {
       } flags;
 
       struct vk_sync *wait;
-   } fragment;
+   } fragment, fragment_pr;
 };
 
 struct pvr_winsys_ops {
index c5f9872..708f864 100644 (file)
@@ -879,17 +879,17 @@ static void pvr_srv_fragment_cmd_ext_stream_load(
    assert((const uint8_t *)ext_stream_ptr - stream == stream_len);
 }
 
-static void pvr_srv_fragment_cmd_init(
-   const struct pvr_winsys_render_submit_info *submit_info,
-   struct rogue_fwif_cmd_3d *cmd,
-   const struct pvr_device_info *dev_info)
+static void
+pvr_srv_fragment_cmd_init(struct rogue_fwif_cmd_3d *cmd,
+                          const struct pvr_winsys_fragment_state *state,
+                          const struct pvr_device_info *dev_info,
+                          uint32_t frame_num)
 {
-   const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
    uint32_t ext_stream_offset;
 
    memset(cmd, 0, sizeof(*cmd));
 
-   cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
+   cmd->cmd_shared.cmn.frame_num = frame_num;
 
    ext_stream_offset = pvr_srv_fragment_cmd_stream_load(cmd,
                                                         state->fw_stream,
@@ -944,7 +944,11 @@ VkResult pvr_srv_winsys_render_submit(
    struct pvr_srv_sync *srv_signal_sync_frag;
 
    struct rogue_fwif_cmd_ta geom_cmd;
-   struct rogue_fwif_cmd_3d frag_cmd;
+   struct rogue_fwif_cmd_3d frag_cmd = { 0 };
+   struct rogue_fwif_cmd_3d pr_cmd = { 0 };
+
+   uint8_t *frag_cmd_ptr = NULL;
+   uint32_t frag_cmd_size = 0;
 
    uint32_t current_sync_value = sync_prim->value;
    uint32_t geom_sync_update_value;
@@ -962,10 +966,20 @@ VkResult pvr_srv_winsys_render_submit(
 
    pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
 
-   if (submit_info->run_frag)
-      pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
-   else
-      memset(&frag_cmd, 0, sizeof(frag_cmd));
+   pvr_srv_fragment_cmd_init(&pr_cmd,
+                             &submit_info->fragment_pr,
+                             dev_info,
+                             submit_info->frame_num);
+
+   if (submit_info->has_fragment_job) {
+      pvr_srv_fragment_cmd_init(&frag_cmd,
+                                &submit_info->fragment,
+                                dev_info,
+                                submit_info->frame_num);
+
+      frag_cmd_ptr = (uint8_t *)&frag_cmd;
+      frag_cmd_size = sizeof(frag_cmd);
+   }
 
    if (submit_info->geometry.wait) {
       struct pvr_srv_sync *srv_wait_sync =
@@ -1005,7 +1019,7 @@ VkResult pvr_srv_winsys_render_submit(
    /* Geometery is always kicked */
    geom_sync_update_value = ++current_sync_value;
 
-   if (submit_info->run_frag) {
+   if (submit_info->has_fragment_job) {
       frag_sync_update_count = 1;
       frag_sync_update_value = ++current_sync_value;
    }
@@ -1044,18 +1058,16 @@ VkResult pvr_srv_winsys_render_submit(
                                         "FRAG",
                                         sizeof(geom_cmd),
                                         (uint8_t *)&geom_cmd,
-                                        /* Currently no support for PRs. */
-                                        0,
-                                        /* Currently no support for PRs. */
-                                        NULL,
-                                        sizeof(frag_cmd),
-                                        (uint8_t *)&frag_cmd,
+                                        sizeof(pr_cmd),
+                                        (uint8_t *)&pr_cmd,
+                                        frag_cmd_size,
+                                        frag_cmd_ptr,
                                         submit_info->job_num,
                                         /* Always kick the TA. */
                                         true,
                                         /* Always kick a PR. */
                                         true,
-                                        submit_info->run_frag,
+                                        submit_info->has_fragment_job,
                                         false,
                                         0,
                                         rt_data_handle,