From 3798f99c4693dbf83e93512f47b8af55c651ebba Mon Sep 17 00:00:00 2001 From: Karmjit Mahil Date: Mon, 26 Jun 2023 11:52:39 +0100 Subject: [PATCH] pvr: Submit PR commands This commit adds a partial render command to job submission. For geom only jobs we must always submit a pr command in case we enter SPM. For now, for geom+frag jobs, we'll also always submit a pr command event. Signed-off-by: Karmjit Mahil Reviewed-by: Frank Binns Part-of: --- src/imagination/vulkan/pvr_cmd_buffer.c | 26 ++-- src/imagination/vulkan/pvr_job_render.c | 138 +++++++++++++++++++-- src/imagination/vulkan/pvr_job_render.h | 3 + src/imagination/vulkan/winsys/pvr_winsys.h | 6 +- .../vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c | 50 +++++--- 5 files changed, 182 insertions(+), 41 deletions(-) diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 1cfe83e..805c53c 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1459,6 +1459,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, job->pds_bgnd_reg_values); } + /* TODO: In some cases a PR can be removed by storing to the color attachment + * and have the background object load directly from it instead of using the + * scratch buffer. In those cases we can also set this to "false" and avoid + * extra fw overhead. + */ + /* The scratch buffer is always needed and allocated to avoid data loss in + * case SPM is hit so set the flag unconditionally. + */ + job->requires_spm_scratch_buffer = true; + + memcpy(job->pr_pbe_reg_words, + &framebuffer->spm_eot_state_per_render[0].pbe_reg_words, + sizeof(job->pbe_reg_words)); + job->pr_pds_pixel_event_data_offset = + framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset; + STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); typed_memcpy(job->pds_pr_bgnd_reg_values, @@ -1694,16 +1710,6 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, job->run_frag = true; job->geometry_terminate = true; - /* TODO: In some cases a PR can be removed by storing to the color attachment - * and have the background object load directly from it instead of using the - * scratch buffer. In those cases we can also set this to "false" and avoid - * extra fw overhead. - */ - /* The scratch buffer is always needed and allocated to avoid data loss in - * case SPM is hit so set the flag unconditionally. - */ - job->requires_spm_scratch_buffer = true; - return VK_SUCCESS; } diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index 35a6bb9..3354d80 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1064,6 +1064,55 @@ pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx, pvr_geom_state_flags_init(job, &state->flags); } +static inline uint32_t pvr_frag_km_stream_pbe_reg_words_offset( + const struct pvr_device_info *const dev_info) +{ + uint32_t offset = 0; + + offset += pvr_cmd_length(KMD_STREAM_HDR); + offset += pvr_cmd_length(CR_ISP_SCISSOR_BASE); + offset += pvr_cmd_length(CR_ISP_DBIAS_BASE); + offset += pvr_cmd_length(CR_ISP_OCLQRY_BASE); + offset += pvr_cmd_length(CR_ISP_ZLSCTL); + offset += pvr_cmd_length(CR_ISP_ZLOAD_BASE); + offset += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE); + + if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup)) + offset += pvr_cmd_length(CR_FB_CDC_ZLS); + + return PVR_DW_TO_BYTES(offset); +} + +#define DWORDS_PER_U64 2 + +static inline uint32_t pvr_frag_km_stream_pds_eot_data_addr_offset( + const struct pvr_device_info *const dev_info) +{ + uint32_t offset = 0; + + offset += pvr_frag_km_stream_pbe_reg_words_offset(dev_info) / 4U; + offset += + PVR_MAX_COLOR_ATTACHMENTS * ROGUE_NUM_PBESTATE_REG_WORDS * DWORDS_PER_U64; + offset += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM); + offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64; + offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64; + offset += PVRX(KMD_STREAM_USC_CLEAR_REGISTER_COUNT) * + pvr_cmd_length(CR_USC_CLEAR_REGISTER); + offset += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL); + offset += pvr_cmd_length(CR_ISP_BGOBJDEPTH); + offset += pvr_cmd_length(CR_ISP_BGOBJVALS); + offset += pvr_cmd_length(CR_ISP_AA); + offset += pvr_cmd_length(CR_ISP_CTL); + offset += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO); + + if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) + offset += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM); + + offset += pvr_cmd_length(KMD_STREAM_VIEW_IDX); + + return PVR_DW_TO_BYTES(offset); +} + static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, struct pvr_render_job *job, struct pvr_winsys_fragment_state *state) @@ -1197,7 +1246,11 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, stream_ptr += pvr_cmd_length(CR_FB_CDC_ZLS); } -#define DWORDS_PER_U64 2 + /* Make sure that the pvr_frag_km_...() function is returning the correct + * offset. + */ + assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream == + pvr_frag_km_stream_pbe_reg_words_offset(dev_info)); STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == PVR_MAX_COLOR_ATTACHMENTS); STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) == @@ -1338,6 +1391,12 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, /* clang-format on */ stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX); + /* Make sure that the pvr_frag_km_...() function is returning the correct + * offset. + */ + assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream == + pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info)); + pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) { value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset); } @@ -1388,6 +1447,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx, } } +#undef DWORDS_PER_U64 + static void pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx, struct pvr_render_job *job, @@ -1452,6 +1513,53 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, pvr_frag_state_flags_init(job, &state->flags); } +/** + * \brief Sets up the fragment state for a Partial Render (PR) based on the + * state for a normal fragment job. + * + * The state of a fragment PR is almost the same as of that for a normal + * fragment job apart the PBE words and the EOT program, both of which are + * necessary for the render to use the SPM scratch buffer instead of the final + * render targets. + * + * By basing the fragment PR state on that of a normal fragment state, + * repacking of the same words can be avoided as we end up mostly doing copies + * instead. + */ +static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state( + const struct pvr_render_ctx *ctx, + struct pvr_render_job *job, + struct vk_sync *wait, + struct pvr_winsys_fragment_state *frag, + struct pvr_winsys_fragment_state *state) +{ + const struct pvr_device_info *const dev_info = + &ctx->device->pdevice->dev_info; + const uint32_t pbe_reg_byte_offset = + pvr_frag_km_stream_pbe_reg_words_offset(dev_info); + const uint32_t eot_data_addr_byte_offset = + pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info); + + /* Massive copy :( */ + *state = *frag; + + assert(state->fw_stream_len >= + pbe_reg_byte_offset + sizeof(job->pr_pbe_reg_words)); + memcpy(&state->fw_stream[pbe_reg_byte_offset], + job->pr_pbe_reg_words, + sizeof(job->pr_pbe_reg_words)); + + /* TODO: Update this when csbgen is byte instead of dword granular. */ + assert(state->fw_stream_len >= + eot_data_addr_byte_offset + + PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA))); + pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset], + CR_EVENT_PIXEL_PDS_DATA, + eot_pds_data) { + eot_pds_data.addr = PVR_DEV_ADDR(job->pr_pds_pixel_event_data_offset); + } +} + static void pvr_render_job_ws_submit_info_init( struct pvr_render_ctx *ctx, struct pvr_render_job *job, @@ -1472,14 +1580,28 @@ static void pvr_render_job_ws_submit_info_init( wait_geom, &submit_info->geometry); - if (job->run_frag) { - submit_info->run_frag = true; + submit_info->has_fragment_job = job->run_frag; - pvr_render_job_ws_fragment_state_init(ctx, - job, - wait_frag, - &submit_info->fragment); - } + /* TODO: Move the job setup from queue submit into cmd_buffer if possible. */ + + /* TODO: See if it's worth avoiding setting up the fragment state and setup + * the pr state directly if `!job->run_frag`. For now we'll always set it up. + */ + pvr_render_job_ws_fragment_state_init(ctx, + job, + wait_frag, + &submit_info->fragment); + + /* TODO: In some cases we could eliminate the pr and use the frag directly in + * case we enter SPM. There's likely some performance improvement to be had + * there. For now we'll always setup the pr. + */ + pvr_render_job_ws_fragment_pr_init_based_on_fragment_state( + ctx, + job, + wait_frag, + &submit_info->fragment, + &submit_info->fragment_pr); } VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx, diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h index ae21c3e..042cef4 100644 --- a/src/imagination/vulkan/pvr_job_render.h +++ b/src/imagination/vulkan/pvr_job_render.h @@ -86,6 +86,7 @@ struct pvr_render_job { }; uint32_t pds_pixel_event_data_offset; + uint32_t pr_pds_pixel_event_data_offset; pvr_dev_addr_t ctrl_stream_addr; @@ -147,6 +148,8 @@ struct pvr_render_job { "Cannot store both PBESTATE_REG_WORD{0,1}"); uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS] [ROGUE_NUM_PBESTATE_REG_WORDS]; + uint64_t pr_pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS] + [ROGUE_NUM_PBESTATE_REG_WORDS]; static_assert(pvr_cmd_length(CR_PDS_BGRND0_BASE) == 2, "CR_PDS_BGRND0_BASE cannot be stored in uint64_t"); diff --git a/src/imagination/vulkan/winsys/pvr_winsys.h b/src/imagination/vulkan/winsys/pvr_winsys.h index a8efc5a..e232ac9 100644 --- a/src/imagination/vulkan/winsys/pvr_winsys.h +++ b/src/imagination/vulkan/winsys/pvr_winsys.h @@ -314,9 +314,7 @@ struct pvr_winsys_render_submit_info { uint32_t frame_num; uint32_t job_num; - - /* FIXME: should this be flags instead? */ - bool run_frag; + bool has_fragment_job; struct pvr_winsys_geometry_state { /* Firmware stream buffer. This is the maximum possible size taking into @@ -351,7 +349,7 @@ struct pvr_winsys_render_submit_info { } flags; struct vk_sync *wait; - } fragment; + } fragment, fragment_pr; }; struct pvr_winsys_ops { diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c index c5f9872..708f864 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c @@ -879,17 +879,17 @@ static void pvr_srv_fragment_cmd_ext_stream_load( assert((const uint8_t *)ext_stream_ptr - stream == stream_len); } -static void pvr_srv_fragment_cmd_init( - const struct pvr_winsys_render_submit_info *submit_info, - struct rogue_fwif_cmd_3d *cmd, - const struct pvr_device_info *dev_info) +static void +pvr_srv_fragment_cmd_init(struct rogue_fwif_cmd_3d *cmd, + const struct pvr_winsys_fragment_state *state, + const struct pvr_device_info *dev_info, + uint32_t frame_num) { - const struct pvr_winsys_fragment_state *state = &submit_info->fragment; uint32_t ext_stream_offset; memset(cmd, 0, sizeof(*cmd)); - cmd->cmd_shared.cmn.frame_num = submit_info->frame_num; + cmd->cmd_shared.cmn.frame_num = frame_num; ext_stream_offset = pvr_srv_fragment_cmd_stream_load(cmd, state->fw_stream, @@ -944,7 +944,11 @@ VkResult pvr_srv_winsys_render_submit( struct pvr_srv_sync *srv_signal_sync_frag; struct rogue_fwif_cmd_ta geom_cmd; - struct rogue_fwif_cmd_3d frag_cmd; + struct rogue_fwif_cmd_3d frag_cmd = { 0 }; + struct rogue_fwif_cmd_3d pr_cmd = { 0 }; + + uint8_t *frag_cmd_ptr = NULL; + uint32_t frag_cmd_size = 0; uint32_t current_sync_value = sync_prim->value; uint32_t geom_sync_update_value; @@ -962,10 +966,20 @@ VkResult pvr_srv_winsys_render_submit( pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info); - if (submit_info->run_frag) - pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info); - else - memset(&frag_cmd, 0, sizeof(frag_cmd)); + pvr_srv_fragment_cmd_init(&pr_cmd, + &submit_info->fragment_pr, + dev_info, + submit_info->frame_num); + + if (submit_info->has_fragment_job) { + pvr_srv_fragment_cmd_init(&frag_cmd, + &submit_info->fragment, + dev_info, + submit_info->frame_num); + + frag_cmd_ptr = (uint8_t *)&frag_cmd; + frag_cmd_size = sizeof(frag_cmd); + } if (submit_info->geometry.wait) { struct pvr_srv_sync *srv_wait_sync = @@ -1005,7 +1019,7 @@ VkResult pvr_srv_winsys_render_submit( /* Geometery is always kicked */ geom_sync_update_value = ++current_sync_value; - if (submit_info->run_frag) { + if (submit_info->has_fragment_job) { frag_sync_update_count = 1; frag_sync_update_value = ++current_sync_value; } @@ -1044,18 +1058,16 @@ VkResult pvr_srv_winsys_render_submit( "FRAG", sizeof(geom_cmd), (uint8_t *)&geom_cmd, - /* Currently no support for PRs. */ - 0, - /* Currently no support for PRs. */ - NULL, - sizeof(frag_cmd), - (uint8_t *)&frag_cmd, + sizeof(pr_cmd), + (uint8_t *)&pr_cmd, + frag_cmd_size, + frag_cmd_ptr, submit_info->job_num, /* Always kick the TA. */ true, /* Always kick a PR. */ true, - submit_info->run_frag, + submit_info->has_fragment_job, false, 0, rt_data_handle, -- 2.7.4