job->pds_bgnd_reg_values);
}
+ /* TODO: In some cases a PR can be removed by storing to the color attachment
+ * and have the background object load directly from it instead of using the
+ * scratch buffer. In those cases we can also set this to "false" and avoid
+ * extra fw overhead.
+ */
+ /* The scratch buffer is always needed and allocated to avoid data loss in
+ * case SPM is hit so set the flag unconditionally.
+ */
+ job->requires_spm_scratch_buffer = true;
+
+ memcpy(job->pr_pbe_reg_words,
+ &framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
+ sizeof(job->pbe_reg_words));
+ job->pr_pds_pixel_event_data_offset =
+ framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset;
+
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pds_pr_bgnd_reg_values,
job->run_frag = true;
job->geometry_terminate = true;
- /* TODO: In some cases a PR can be removed by storing to the color attachment
- * and have the background object load directly from it instead of using the
- * scratch buffer. In those cases we can also set this to "false" and avoid
- * extra fw overhead.
- */
- /* The scratch buffer is always needed and allocated to avoid data loss in
- * case SPM is hit so set the flag unconditionally.
- */
- job->requires_spm_scratch_buffer = true;
-
return VK_SUCCESS;
}
pvr_geom_state_flags_init(job, &state->flags);
}
+static inline uint32_t pvr_frag_km_stream_pbe_reg_words_offset(
+ const struct pvr_device_info *const dev_info)
+{
+ uint32_t offset = 0;
+
+ offset += pvr_cmd_length(KMD_STREAM_HDR);
+ offset += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
+ offset += pvr_cmd_length(CR_ISP_DBIAS_BASE);
+ offset += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
+ offset += pvr_cmd_length(CR_ISP_ZLSCTL);
+ offset += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
+ offset += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
+
+ if (PVR_HAS_FEATURE(dev_info, requires_fb_cdc_zls_setup))
+ offset += pvr_cmd_length(CR_FB_CDC_ZLS);
+
+ return PVR_DW_TO_BYTES(offset);
+}
+
+#define DWORDS_PER_U64 2
+
+static inline uint32_t pvr_frag_km_stream_pds_eot_data_addr_offset(
+ const struct pvr_device_info *const dev_info)
+{
+ uint32_t offset = 0;
+
+ offset += pvr_frag_km_stream_pbe_reg_words_offset(dev_info) / 4U;
+ offset +=
+ PVR_MAX_COLOR_ATTACHMENTS * ROGUE_NUM_PBESTATE_REG_WORDS * DWORDS_PER_U64;
+ offset += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
+ offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
+ offset += ROGUE_NUM_CR_PDS_BGRND_WORDS * DWORDS_PER_U64;
+ offset += PVRX(KMD_STREAM_USC_CLEAR_REGISTER_COUNT) *
+ pvr_cmd_length(CR_USC_CLEAR_REGISTER);
+ offset += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
+ offset += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
+ offset += pvr_cmd_length(CR_ISP_BGOBJVALS);
+ offset += pvr_cmd_length(CR_ISP_AA);
+ offset += pvr_cmd_length(CR_ISP_CTL);
+ offset += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
+
+ if (PVR_HAS_FEATURE(dev_info, cluster_grouping))
+ offset += pvr_cmd_length(KMD_STREAM_PIXEL_PHANTOM);
+
+ offset += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
+
+ return PVR_DW_TO_BYTES(offset);
+}
+
static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
struct pvr_winsys_fragment_state *state)
stream_ptr += pvr_cmd_length(CR_FB_CDC_ZLS);
}
-#define DWORDS_PER_U64 2
+ /* Make sure that the pvr_frag_km_...() function is returning the correct
+ * offset.
+ */
+ assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
+ pvr_frag_km_stream_pbe_reg_words_offset(dev_info));
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == PVR_MAX_COLOR_ATTACHMENTS);
STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) ==
/* clang-format on */
stream_ptr += pvr_cmd_length(KMD_STREAM_VIEW_IDX);
+ /* Make sure that the pvr_frag_km_...() function is returning the correct
+ * offset.
+ */
+ assert((uint8_t *)stream_ptr - (uint8_t *)state->fw_stream ==
+ pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info));
+
pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
}
}
}
+#undef DWORDS_PER_U64
+
static void
pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
pvr_frag_state_flags_init(job, &state->flags);
}
+/**
+ * \brief Sets up the fragment state for a Partial Render (PR) based on the
+ * state for a normal fragment job.
+ *
+ * The state of a fragment PR is almost the same as of that for a normal
+ * fragment job apart the PBE words and the EOT program, both of which are
+ * necessary for the render to use the SPM scratch buffer instead of the final
+ * render targets.
+ *
+ * By basing the fragment PR state on that of a normal fragment state,
+ * repacking of the same words can be avoided as we end up mostly doing copies
+ * instead.
+ */
+static void pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
+ const struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct vk_sync *wait,
+ struct pvr_winsys_fragment_state *frag,
+ struct pvr_winsys_fragment_state *state)
+{
+ const struct pvr_device_info *const dev_info =
+ &ctx->device->pdevice->dev_info;
+ const uint32_t pbe_reg_byte_offset =
+ pvr_frag_km_stream_pbe_reg_words_offset(dev_info);
+ const uint32_t eot_data_addr_byte_offset =
+ pvr_frag_km_stream_pds_eot_data_addr_offset(dev_info);
+
+ /* Massive copy :( */
+ *state = *frag;
+
+ assert(state->fw_stream_len >=
+ pbe_reg_byte_offset + sizeof(job->pr_pbe_reg_words));
+ memcpy(&state->fw_stream[pbe_reg_byte_offset],
+ job->pr_pbe_reg_words,
+ sizeof(job->pr_pbe_reg_words));
+
+ /* TODO: Update this when csbgen is byte instead of dword granular. */
+ assert(state->fw_stream_len >=
+ eot_data_addr_byte_offset +
+ PVR_DW_TO_BYTES(pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA)));
+ pvr_csb_pack ((uint32_t *)&state->fw_stream[eot_data_addr_byte_offset],
+ CR_EVENT_PIXEL_PDS_DATA,
+ eot_pds_data) {
+ eot_pds_data.addr = PVR_DEV_ADDR(job->pr_pds_pixel_event_data_offset);
+ }
+}
+
static void pvr_render_job_ws_submit_info_init(
struct pvr_render_ctx *ctx,
struct pvr_render_job *job,
wait_geom,
&submit_info->geometry);
- if (job->run_frag) {
- submit_info->run_frag = true;
+ submit_info->has_fragment_job = job->run_frag;
- pvr_render_job_ws_fragment_state_init(ctx,
- job,
- wait_frag,
- &submit_info->fragment);
- }
+ /* TODO: Move the job setup from queue submit into cmd_buffer if possible. */
+
+ /* TODO: See if it's worth avoiding setting up the fragment state and setup
+ * the pr state directly if `!job->run_frag`. For now we'll always set it up.
+ */
+ pvr_render_job_ws_fragment_state_init(ctx,
+ job,
+ wait_frag,
+ &submit_info->fragment);
+
+ /* TODO: In some cases we could eliminate the pr and use the frag directly in
+ * case we enter SPM. There's likely some performance improvement to be had
+ * there. For now we'll always setup the pr.
+ */
+ pvr_render_job_ws_fragment_pr_init_based_on_fragment_state(
+ ctx,
+ job,
+ wait_frag,
+ &submit_info->fragment,
+ &submit_info->fragment_pr);
}
VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
assert((const uint8_t *)ext_stream_ptr - stream == stream_len);
}
-static void pvr_srv_fragment_cmd_init(
- const struct pvr_winsys_render_submit_info *submit_info,
- struct rogue_fwif_cmd_3d *cmd,
- const struct pvr_device_info *dev_info)
+static void
+pvr_srv_fragment_cmd_init(struct rogue_fwif_cmd_3d *cmd,
+ const struct pvr_winsys_fragment_state *state,
+ const struct pvr_device_info *dev_info,
+ uint32_t frame_num)
{
- const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
uint32_t ext_stream_offset;
memset(cmd, 0, sizeof(*cmd));
- cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
+ cmd->cmd_shared.cmn.frame_num = frame_num;
ext_stream_offset = pvr_srv_fragment_cmd_stream_load(cmd,
state->fw_stream,
struct pvr_srv_sync *srv_signal_sync_frag;
struct rogue_fwif_cmd_ta geom_cmd;
- struct rogue_fwif_cmd_3d frag_cmd;
+ struct rogue_fwif_cmd_3d frag_cmd = { 0 };
+ struct rogue_fwif_cmd_3d pr_cmd = { 0 };
+
+ uint8_t *frag_cmd_ptr = NULL;
+ uint32_t frag_cmd_size = 0;
uint32_t current_sync_value = sync_prim->value;
uint32_t geom_sync_update_value;
pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
- if (submit_info->run_frag)
- pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
- else
- memset(&frag_cmd, 0, sizeof(frag_cmd));
+ pvr_srv_fragment_cmd_init(&pr_cmd,
+ &submit_info->fragment_pr,
+ dev_info,
+ submit_info->frame_num);
+
+ if (submit_info->has_fragment_job) {
+ pvr_srv_fragment_cmd_init(&frag_cmd,
+ &submit_info->fragment,
+ dev_info,
+ submit_info->frame_num);
+
+ frag_cmd_ptr = (uint8_t *)&frag_cmd;
+ frag_cmd_size = sizeof(frag_cmd);
+ }
if (submit_info->geometry.wait) {
struct pvr_srv_sync *srv_wait_sync =
/* Geometery is always kicked */
geom_sync_update_value = ++current_sync_value;
- if (submit_info->run_frag) {
+ if (submit_info->has_fragment_job) {
frag_sync_update_count = 1;
frag_sync_update_value = ++current_sync_value;
}
"FRAG",
sizeof(geom_cmd),
(uint8_t *)&geom_cmd,
- /* Currently no support for PRs. */
- 0,
- /* Currently no support for PRs. */
- NULL,
- sizeof(frag_cmd),
- (uint8_t *)&frag_cmd,
+ sizeof(pr_cmd),
+ (uint8_t *)&pr_cmd,
+ frag_cmd_size,
+ frag_cmd_ptr,
submit_info->job_num,
/* Always kick the TA. */
true,
/* Always kick a PR. */
true,
- submit_info->run_frag,
+ submit_info->has_fragment_job,
false,
0,
rt_data_handle,