.has_brn51764 = true,
.has_brn52354 = true,
.has_brn52942 = true,
- .has_brn56279 = true,
.has_brn58839 = true,
.has_brn62269 = true,
.has_brn66011 = true,
bool has_brn51764 : 1;
bool has_brn52354 : 1;
bool has_brn52942 : 1;
- bool has_brn56279 : 1;
bool has_brn58839 : 1;
bool has_brn62269 : 1;
bool has_brn66011 : 1;
pvr_xml_files = [
'rogue_cdm.xml',
'rogue_cr.xml',
+ 'rogue_fw.xml',
'rogue_ipf.xml',
'rogue_lls.xml',
'rogue_pbestate.xml',
<field name="mask" start="0" end="31" type="uint"/>
</struct>
- <struct name="PDS_CTRL" length="2">
- <field name="sm_overlap_enable" start="55" end="55" type="bool"/>
+ <struct name="CDM_ITEM" length="1">
+ <field name="mode" start="0" end="1" type="uint"/>
+ </struct>
+
+ <struct name="PDS_CTRL0" length="1">
<condition type="if" check="ROGUEXE"/>
<condition type="if" check="COMPUTE"/>
<field name="roguexe_max_num_cdm_tasks" start="24" end="31" type="uint"/>
<condition type="endif" check="ROGUEXE"/>
</struct>
+ <struct name="PDS_CTRL1" length="1">
+ <field name="sm_overlap_enable" start="23" end="23" type="bool"/>
+ </struct>
+
<struct name="EVENT_PIXEL_PDS_CODE" length="1">
<field name="addr" start="4" end="31" shift="4" type="address"/>
</struct>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<!--
+These definitions are based on the ones for the firmware streams found
+in the fwif header (pvr_rogue_fwif_stream.h).
+
+TODO: Once the kernel driver is merged upstream, check to see if this comment
+needs updating.
+-->
+
+<csbgen name="ROGUE" prefix="FW">
+
+ <define name="STREAM_EXTHDR_DATA_MASK" value="0xFFFFFFF"/>
+
+ <enum name="STREAM_EXTHDR_TYPE_COMPUTE">
+ <value name="0" value="0"/>
+ </enum>
+
+ <enum name="STREAM_EXTHDR_TYPE_GEOM">
+ <value name="0" value="0"/>
+ </enum>
+
+ <enum name="STREAM_EXTHDR_TYPE_FRAG">
+ <value name="0" value="0"/>
+ </enum>
+
+ <struct name="STREAM_EXTHDR_COMPUTE0" length="1">
+ <field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_COMPUTE" default="0"/>
+ <field name="continuation" start="28" end="28" type="bool"/>
+ <field name="has_brn49927" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="STREAM_EXTHDR_FRAG0" length="1">
+ <field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_FRAG" default="0"/>
+ <field name="continuation" start="28" end="28" type="bool"/>
+ <field name="has_brn49927" start="1" end="1" type="bool"/>
+ <field name="has_brn47217" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="STREAM_EXTHDR_GEOM0" length="1">
+ <field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_GEOM" default="0"/>
+ <field name="continuation" start="28" end="28" type="bool"/>
+ <field name="has_brn49927" start="0" end="0" type="bool"/>
+ </struct>
+
+</csbgen>
#include "rogue_cdm.h"
#include "rogue_cr.h"
+#include "rogue_fw.h"
#include "rogue_ipf.h"
#include "rogue_lls.h"
#include "rogue_pbestate.h"
}
static inline uint32_t
-rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info)
-{
- /* Default value based on the minimum value found in all existing cores. */
- uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U);
-
- /* FIXME: Where does the 9 come from? */
- return max_usc_tasks - 9;
-}
-
-static inline uint32_t
rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
{
if (PVR_HAS_FEATURE(dev_info, eight_output_registers))
#include "pvr_winsys.h"
#include "util/macros.h"
-static void pvr_compute_job_ws_submit_info_init(
- struct pvr_compute_ctx *ctx,
- struct pvr_sub_cmd_compute *sub_cmd,
- struct vk_sync *barrier,
- struct vk_sync **waits,
- uint32_t wait_count,
- uint32_t *stage_flags,
- struct pvr_winsys_compute_submit_info *submit_info)
+static void
+pvr_submit_info_stream_init(struct pvr_compute_ctx *ctx,
+ struct pvr_sub_cmd_compute *sub_cmd,
+ struct pvr_winsys_compute_submit_info *submit_info)
{
- const struct pvr_device *const device = ctx->device;
- const struct pvr_physical_device *const pdevice = device->pdevice;
+ const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
const struct pvr_device_runtime_info *const dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *const dev_info = &pdevice->dev_info;
const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
- uint32_t shared_regs = sub_cmd->num_shared_regs;
-
- submit_info->frame_num = device->global_queue_present_count;
- submit_info->job_num = device->global_queue_job_count;
-
- submit_info->barrier = barrier;
-
- submit_info->waits = waits;
- submit_info->wait_count = wait_count;
- submit_info->stage_flags = stage_flags;
- pvr_csb_pack (&submit_info->regs.cdm_ctrl_stream_base,
- CR_CDM_CTRL_STREAM_BASE,
- value) {
- value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
- }
+ uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream;
- /* FIXME: Need to set up the border color table at device creation
- * time. Set to invalid for the time being.
+ /* FIXME: Need to set up the border color table at device creation time. Set
+ * to invalid for the time being.
*/
- pvr_csb_pack (&submit_info->regs.tpu_border_colour_table,
+ pvr_csb_pack ((uint64_t *)stream_ptr,
CR_TPU_BORDER_COLOUR_TABLE_CDM,
value) {
value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
}
+ stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM);
- if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
- submit_info->regs.cdm_item = 0;
-
- pvr_csb_pack (&submit_info->regs.tpu, CR_TPU, value) {
- value.tag_cem_4k_face_packing = true;
- }
-
- pvr_csb_pack (&submit_info->regs.compute_cluster,
- CR_COMPUTE_CLUSTER,
- value) {
- if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
- PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
- dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
- /* Each phantom has its own MCU, so atomicity can only be guaranteed
- * when all work items are processed on the same phantom. This means we
- * need to disable all USCs other than those of the first phantom,
- * which has 4 clusters.
- */
- value.mask = 0xFU;
- } else {
- value.mask = 0U;
- }
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CTRL_STREAM_BASE, value) {
+ value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
}
+ stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE);
- pvr_csb_pack (&submit_info->regs.cdm_ctx_state_base_addr,
- CR_CDM_CONTEXT_STATE_BASE,
- state) {
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CONTEXT_STATE_BASE, state) {
state.addr = ctx_switch->compute_state_bo->vma->dev_addr;
}
+ stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE);
- pvr_csb_pack (&submit_info->regs.cdm_resume_pds1,
- CR_CDM_CONTEXT_PDS1,
- state) {
+ pvr_csb_pack (stream_ptr, CR_CDM_CONTEXT_PDS1, state) {
/* Convert the data size from dwords to bytes. */
const uint32_t load_program_data_size =
ctx_switch->sr[0].pds.load_program.data_size * 4U;
state.unified_size = ctx_switch->sr[0].usc.unified_size;
state.common_shared = true;
state.common_size =
- DIV_ROUND_UP(shared_regs << 2,
+ DIV_ROUND_UP(sub_cmd->num_shared_regs << 2,
PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE));
state.temp_size = 0;
load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE);
state.fence = false;
}
+ stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1);
+
+ if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) {
+ pvr_csb_pack (stream_ptr, CR_CDM_ITEM, value) {
+ value.mode = 0;
+ }
+ stream_ptr += pvr_cmd_length(CR_CDM_ITEM);
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+ pvr_csb_pack (stream_ptr, CR_COMPUTE_CLUSTER, value) {
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+ dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
+ /* Each phantom has its own MCU, so atomicity can only be
+ * guaranteed when all work items are processed on the same
+ * phantom. This means we need to disable all USCs other than
+ * those of the first phantom, which has 4 clusters.
+ */
+ value.mask = 0xFU;
+ } else {
+ value.mask = 0U;
+ }
+ }
+ stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER);
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ pvr_finishme(
+ "Emit execute_count when feature gpu_multicore_support is present");
+ *stream_ptr = 0;
+ stream_ptr++;
+ }
+
+ submit_info->fw_stream_len = (uint8_t *)stream_ptr - submit_info->fw_stream;
+ assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream));
+}
+
+static void pvr_submit_info_ext_stream_init(
+ struct pvr_compute_ctx *ctx,
+ struct pvr_winsys_compute_submit_info *submit_info)
+{
+ const struct pvr_device_info *const dev_info =
+ &ctx->device->pdevice->dev_info;
+
+ uint32_t *ext_stream_ptr = (uint32_t *)submit_info->fw_ext_stream;
+ uint32_t *header0_ptr;
+
+ header0_ptr = ext_stream_ptr;
+ ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0);
+
+ pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_COMPUTE0, header0) {
+ if (PVR_HAS_QUIRK(dev_info, 49927)) {
+ header0.has_brn49927 = true;
+
+ pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
+ value.tag_cem_4k_face_packing = true;
+ }
+ ext_stream_ptr += pvr_cmd_length(CR_TPU);
+ }
+ }
+
+ submit_info->fw_ext_stream_len =
+ (uint8_t *)ext_stream_ptr - submit_info->fw_ext_stream;
+ assert(submit_info->fw_ext_stream_len <=
+ ARRAY_SIZE(submit_info->fw_ext_stream));
+
+ if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
+ submit_info->fw_ext_stream_len = 0;
+}
+
+static void pvr_compute_job_ws_submit_info_init(
+ struct pvr_compute_ctx *ctx,
+ struct pvr_sub_cmd_compute *sub_cmd,
+ struct vk_sync *barrier,
+ struct vk_sync **waits,
+ uint32_t wait_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_compute_submit_info *submit_info)
+{
+ const struct pvr_device *const device = ctx->device;
+ const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
+
+ submit_info->frame_num = device->global_queue_present_count;
+ submit_info->job_num = device->global_queue_job_count;
+
+ submit_info->barrier = barrier;
+
+ submit_info->waits = waits;
+ submit_info->wait_count = wait_count;
+ submit_info->stage_flags = stage_flags;
+
+ pvr_submit_info_stream_init(ctx, sub_cmd, submit_info);
+ pvr_submit_info_ext_stream_init(ctx, submit_info);
if (sub_cmd->uses_barrier)
submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
return device->ws->ops->compute_submit(ctx->ws_ctx,
&submit_info,
+ &device->pdevice->dev_info,
signal_sync);
}
vk_free(&device->vk.alloc, rt_dataset);
}
-static void
-pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
- struct pvr_render_job *job,
- struct pvr_winsys_geometry_state *state)
+static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_geometry_state *state)
{
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
- /* FIXME: Should this just be done unconditionally? The firmware will just
- * ignore the value anyway.
- */
- if (PVR_HAS_QUIRK(dev_info, 56279)) {
- pvr_csb_pack (&state->regs.pds_ctrl, CR_PDS_CTRL, value) {
- value.max_num_vdm_tasks = rogue_get_max_num_vdm_pds_tasks(dev_info);
- }
- } else {
- state->regs.pds_ctrl = 0;
+ uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
+
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_VDM_CTRL_STREAM_BASE, value) {
+ value.addr = job->ctrl_stream_addr;
}
+ stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE);
- pvr_csb_pack (&state->regs.ppp_ctrl, CR_PPP_CTRL, value) {
+ pvr_csb_pack ((uint64_t *)stream_ptr,
+ CR_TPU_BORDER_COLOUR_TABLE_VDM,
+ value) {
+ value.border_colour_table_address = job->border_colour_table_addr;
+ }
+ stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM);
+
+ pvr_csb_pack (stream_ptr, CR_PPP_CTRL, value) {
value.wclampen = true;
value.fixed_point_format = 1;
}
+ stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
- pvr_csb_pack (&state->regs.te_psg, CR_TE_PSG, value) {
+ pvr_csb_pack (stream_ptr, CR_TE_PSG, value) {
value.completeonterminate = job->geometry_terminate;
value.region_stride = job->rt_dataset->rgn_headers_stride /
value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942);
}
-
- /* The set up of CR_TPU must be identical to
- * pvr_render_job_ws_fragment_state_init().
- */
- pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
- value.tag_cem_4k_face_packing = true;
- }
-
- pvr_csb_pack (&state->regs.tpu_border_colour_table,
- CR_TPU_BORDER_COLOUR_TABLE_VDM,
- value) {
- value.border_colour_table_address = job->border_colour_table_addr;
- }
-
- pvr_csb_pack (&state->regs.vdm_ctrl_stream_base,
- CR_VDM_CTRL_STREAM_BASE,
- value) {
- value.addr = job->ctrl_stream_addr;
- }
+ stream_ptr += pvr_cmd_length(CR_TE_PSG);
/* Set up the USC common size for the context switch resume/load program
* (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created
* as part of the render context.
*/
- pvr_csb_pack (&state->regs.vdm_ctx_resume_task0_size,
- VDMCTRL_PDS_STATE0,
- value) {
+ pvr_csb_pack (stream_ptr, VDMCTRL_PDS_STATE0, value) {
/* Calculate the size in bytes. */
const uint16_t shared_registers_size = job->max_shared_registers * 4;
value.usc_common_size =
DIV_ROUND_UP(shared_registers_size,
PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
- };
+ }
+ stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
+
+ /* Set up view_idx to 0 */
+ *stream_ptr = 0;
+ stream_ptr++;
+
+ state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream;
+ assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
+}
+
+static void
+pvr_geom_state_stream_ext_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_geometry_state *state)
+{
+ const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+
+ uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream;
+ uint32_t *header0_ptr;
+
+ header0_ptr = ext_stream_ptr;
+ ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0);
+
+ pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_GEOM0, header0) {
+ if (PVR_HAS_QUIRK(dev_info, 49927)) {
+ header0.has_brn49927 = true;
+
+ /* The set up of CR_TPU must be identical to
+ * pvr_render_job_ws_fragment_state_stream_ext_init().
+ */
+ pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
+ value.tag_cem_4k_face_packing = true;
+ }
+ ext_stream_ptr += pvr_cmd_length(CR_TPU);
+ }
+ }
+
+ state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream;
+ assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream));
+
+ if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
+ state->fw_ext_stream_len = 0;
+}
+
+static void
+pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_geometry_state *state)
+{
+ pvr_geom_state_stream_init(ctx, job, state);
+ pvr_geom_state_stream_ext_init(ctx, job, state);
state->flags = 0;
}
}
-static void
-pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
- struct pvr_render_job *job,
- struct pvr_winsys_fragment_state *state)
+static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_fragment_state *state)
{
+ const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
+ const struct pvr_device_runtime_info *dev_runtime_info =
+ &pdevice->dev_runtime_info;
+ const struct pvr_device_info *dev_info = &pdevice->dev_info;
const enum PVRX(CR_ISP_AA_MODE_TYPE)
isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples);
- const struct pvr_device_runtime_info *dev_runtime_info =
- &ctx->device->pdevice->dev_runtime_info;
- const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
- uint32_t isp_ctl;
- /* FIXME: what to do when job->run_frag is false? */
+ uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
+ uint32_t pixel_ctl;
+ uint32_t isp_ctl;
/* FIXME: pass in the number of samples rather than isp_aa_mode? */
pvr_setup_tiles_in_flight(dev_info,
false,
job->max_tiles_in_flight,
&isp_ctl,
- &state->regs.usc_pixel_output_ctrl);
+ &pixel_ctl);
- pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, value) {
- value.sample_pos = true;
-
- /* FIXME: There are a number of things that cause this to be set, this
- * is just one of them.
- */
- value.process_empty_tiles = job->process_empty_tiles;
- }
-
- /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
- * possible to fully pack CR_ISP_CTL above rather than having to OR in part
- * of the value.
- */
- state->regs.isp_ctl |= isp_ctl;
-
- pvr_csb_pack (&state->regs.isp_aa, CR_ISP_AA, value) {
- value.mode = isp_aa_mode;
- }
-
- /* The set up of CR_TPU must be identical to
- * pvr_render_job_ws_geometry_state_init().
- */
- pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
- value.tag_cem_4k_face_packing = true;
- }
-
- if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
- PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
- dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
- /* Each phantom has its own MCU, so atomicity can only be guaranteed
- * when all work items are processed on the same phantom. This means we
- * need to disable all USCs other than those of the first phantom, which
- * has 4 clusters. Note that we only need to do this for atomic
- * operations in fragment shaders, since hardware prevents the TA to run
- * on more than one phantom anyway.
- */
- state->regs.pixel_phantom = 0xF;
- } else {
- state->regs.pixel_phantom = 0;
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_SCISSOR_BASE, value) {
+ value.addr = job->scissor_table_addr;
}
+ stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
- pvr_csb_pack (&state->regs.isp_bgobjvals, CR_ISP_BGOBJVALS, value) {
- value.enablebgtag = job->enable_bg_tag;
-
- value.mask = true;
-
- /* FIXME: Hard code this for now as we don't currently support any
- * stencil image formats.
- */
- value.stencil = 0xFF;
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_DBIAS_BASE, value) {
+ value.addr = job->depth_bias_table_addr;
}
+ stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE);
- pvr_csb_pack (&state->regs.isp_bgobjdepth, CR_ISP_BGOBJDEPTH, value) {
- /* FIXME: This is suitable for the single depth format the driver
- * currently supports, but may need updating to handle other depth
- * formats.
- */
- value.value = fui(job->depth_clear_value);
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_OCLQRY_BASE, value) {
+ value.addr = PVR_DEV_ADDR_INVALID;
}
+ stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
/* FIXME: Some additional set up needed to support depth and stencil
* load/store operations.
*/
- pvr_csb_pack (&state->regs.isp_zlsctl, CR_ISP_ZLSCTL, value) {
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
uint32_t aligned_width =
ALIGN_POT(job->depth_physical_width, ROGUE_IPF_TILE_SIZE_PIXELS);
uint32_t aligned_height =
value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_F32Z);
value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
}
+ stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
- if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
- pvr_csb_pack (&state->regs.isp_zls_pixels, CR_ISP_ZLS_PIXELS, value) {
- value.x = job->depth_stride - 1;
- value.y = job->depth_height - 1;
- }
- } else {
- state->regs.isp_zls_pixels = 0;
- }
-
- pvr_csb_pack (&state->regs.isp_zload_store_base, CR_ISP_ZLOAD_BASE, value) {
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLOAD_BASE, value) {
value.addr = job->depth_addr;
}
+ stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
- pvr_csb_pack (&state->regs.isp_stencil_load_store_base,
- CR_ISP_STENCIL_LOAD_BASE,
- value) {
+ pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_STENCIL_LOAD_BASE, value) {
value.addr = job->stencil_addr;
/* FIXME: May need to set value.enable to true. */
}
+ stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
+
+ *(uint64_t *)stream_ptr = 0;
+ stream_ptr += 2U;
+
+ STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == 8U);
+ STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) == 3U);
+ STATIC_ASSERT(sizeof(job->pbe_reg_words[0][0]) == sizeof(uint64_t));
+ memcpy(stream_ptr, job->pbe_reg_words, sizeof(job->pbe_reg_words));
+ stream_ptr += 8U * 3U * 2U;
- pvr_csb_pack (&state->regs.tpu_border_colour_table,
+ pvr_csb_pack ((uint64_t *)stream_ptr,
CR_TPU_BORDER_COLOUR_TABLE_PDM,
value) {
value.border_colour_table_address = job->border_colour_table_addr;
}
+ stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
- state->regs.isp_oclqry_base = 0;
+ STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) == 3U);
+ STATIC_ASSERT(sizeof(job->pds_bgnd_reg_values[0]) == sizeof(uint64_t));
+ memcpy(stream_ptr,
+ job->pds_bgnd_reg_values,
+ sizeof(job->pds_bgnd_reg_values));
+ stream_ptr += 3U * 2U;
- pvr_csb_pack (&state->regs.isp_dbias_base, CR_ISP_DBIAS_BASE, value) {
- value.addr = job->depth_bias_table_addr;
+ /* Set pds_pr_bgnd array to 0 */
+ memset(stream_ptr, 0, 3U * sizeof(uint64_t));
+ stream_ptr += 3U * 2U;
+
+ /* Set usc_clear_register array to 0 */
+ memset(stream_ptr, 0, 8U * sizeof(uint32_t));
+ stream_ptr += 8U;
+
+ *stream_ptr = pixel_ctl;
+ stream_ptr++;
+
+ pvr_csb_pack (stream_ptr, CR_ISP_BGOBJDEPTH, value) {
+ /* FIXME: This is suitable for the single depth format the driver
+ * currently supports, but may need updating to handle other depth
+ * formats.
+ */
+ value.value = fui(job->depth_clear_value);
}
+ stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
- pvr_csb_pack (&state->regs.isp_scissor_base, CR_ISP_SCISSOR_BASE, value) {
- value.addr = job->scissor_table_addr;
+ pvr_csb_pack (stream_ptr, CR_ISP_BGOBJVALS, value) {
+ value.enablebgtag = job->enable_bg_tag;
+
+ value.mask = true;
+
+ /* FIXME: Hard code this for now as we don't currently support any
+ * stencil image formats.
+ */
+ value.stencil = 0xFF;
}
+ stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
- pvr_csb_pack (&state->regs.event_pixel_pds_info,
- CR_EVENT_PIXEL_PDS_INFO,
- value) {
+ pvr_csb_pack (stream_ptr, CR_ISP_AA, value) {
+ value.mode = isp_aa_mode;
+ }
+ stream_ptr += pvr_cmd_length(CR_ISP_AA);
+
+ pvr_csb_pack (stream_ptr, CR_ISP_CTL, value) {
+ value.sample_pos = true;
+
+ /* FIXME: There are a number of things that cause this to be set, this
+ * is just one of them.
+ */
+ value.process_empty_tiles = job->process_empty_tiles;
+ }
+ /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
+ * possible to fully pack CR_ISP_CTL above rather than having to OR in part
+ * of the value.
+ */
+ *stream_ptr |= isp_ctl;
+ stream_ptr += pvr_cmd_length(CR_ISP_CTL);
+
+ pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_INFO, value) {
value.const_size =
DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords,
PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
DIV_ROUND_UP(PVR_STATE_PBE_DWORDS,
PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
}
+ stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
+
+ if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+ uint32_t pixel_phantom = 0;
+
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+ dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
+ /* Each phantom has its own MCU, so atomicity can only be guaranteed
+ * when all work items are processed on the same phantom. This means
+ * we need to disable all USCs other than those of the first
+ * phantom, which has 4 clusters. Note that we only need to do this
+ * for atomic operations in fragment shaders, since hardware
+ * prevents the TA to run on more than one phantom anyway.
+ */
+ pixel_phantom = 0xF;
+ }
- pvr_csb_pack (&state->regs.event_pixel_pds_data,
- CR_EVENT_PIXEL_PDS_DATA,
- value) {
+ *stream_ptr = pixel_phantom;
+ stream_ptr++;
+ }
+
+ /* Set up view_idx to 0 */
+ *stream_ptr = 0;
+ stream_ptr++;
+
+ pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
}
+ stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
- STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word) ==
- ARRAY_SIZE(job->pbe_reg_words));
- STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word[0]) ==
- ARRAY_SIZE(job->pbe_reg_words[0]));
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ pvr_finishme(
+ "Emit isp_oclqry_stride when feature gpu_multicore_support is present");
+ *stream_ptr = 0;
+ stream_ptr++;
+ }
- for (uint32_t i = 0; i < ARRAY_SIZE(job->pbe_reg_words); i++) {
- state->regs.pbe_word[i][0] = job->pbe_reg_words[i][0];
- state->regs.pbe_word[i][1] = job->pbe_reg_words[i][1];
- state->regs.pbe_word[i][2] = job->pbe_reg_words[i][2];
+ if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
+ pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) {
+ value.x = job->depth_stride - 1;
+ value.y = job->depth_height - 1;
+ }
+ stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
}
- STATIC_ASSERT(__same_type(state->regs.pds_bgnd, job->pds_bgnd_reg_values));
- typed_memcpy(state->regs.pds_bgnd,
- job->pds_bgnd_reg_values,
- ARRAY_SIZE(state->regs.pds_bgnd));
+ /* zls_stride */
+ *stream_ptr = job->depth_layer_size;
+ stream_ptr++;
+
+ /* sls_stride */
+ *stream_ptr = job->depth_layer_size;
+ stream_ptr++;
- memset(state->regs.pds_pr_bgnd, 0, sizeof(state->regs.pds_pr_bgnd));
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ pvr_finishme(
+ "Emit execute_count when feature gpu_multicore_support is present");
+ *stream_ptr = 0;
+ stream_ptr++;
+ }
+
+ state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream;
+ assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
+}
+
+static void
+pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_fragment_state *state)
+{
+ const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+
+ uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream;
+ uint32_t *header0_ptr;
+
+ header0_ptr = ext_stream_ptr;
+ ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0);
+
+ pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_FRAG0, header0) {
+ if (PVR_HAS_QUIRK(dev_info, 49927)) {
+ header0.has_brn49927 = true;
+
+ /* The set up of CR_TPU must be identical to
+ * pvr_render_job_ws_geometry_state_stream_ext_init().
+ */
+ pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
+ value.tag_cem_4k_face_packing = true;
+ }
+ ext_stream_ptr += pvr_cmd_length(CR_TPU);
+ }
+ }
+
+ state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream;
+ assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream));
+
+ if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
+ state->fw_ext_stream_len = 0;
+}
+
+static void
+pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_fragment_state *state)
+{
+ /* FIXME: what to do when job->run_frag is false? */
+
+ pvr_frag_state_stream_init(ctx, job, state);
+ pvr_frag_state_stream_ext_init(ctx, job, state);
- /* FIXME: Merge geometry and fragment flags into a single flags member? */
/* FIXME: move to its own function? */
state->flags = 0;
if (job->frag_uses_atomic_ops)
state->flags |= PVR_WINSYS_FRAG_FLAG_SINGLE_CORE;
-
- state->zls_stride = job->depth_layer_size;
- state->sls_stride = job->depth_layer_size;
}
static void pvr_render_job_ws_submit_info_init(
pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry);
pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment);
-
- /* These values are expected to match. */
- assert(submit_info->geometry.regs.tpu == submit_info->fragment.regs.tpu);
}
VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
result = device->ws->ops->render_submit(ctx->ws_ctx,
&submit_info,
+ &device->pdevice->dev_info,
signal_sync_geom,
signal_sync_frag);
if (result != VK_SUCCESS)
uint32_t wait_count;
uint32_t *stage_flags;
- struct {
- uint64_t tpu_border_colour_table;
- uint64_t cdm_ctrl_stream_base;
- uint64_t cdm_ctx_state_base_addr;
- uint32_t tpu;
- uint32_t cdm_resume_pds1;
- uint32_t cdm_item;
- uint32_t compute_cluster;
- } regs;
+ /* Firmware stream buffer. This is the maximum possible size taking into
+ * consideration all HW features.
+ */
+ uint8_t fw_stream[92];
+ uint32_t fw_stream_len;
+
+ /* Firmware extension stream buffer. This is the maximum possible size taking
+ * into considation all quirks and enhancements.
+ */
+ uint8_t fw_ext_stream[8];
+ uint32_t fw_ext_stream_len;
/* Must be 0 or a combination of PVR_WINSYS_COMPUTE_FLAG_* flags. */
uint32_t flags;
uint32_t *stage_flags;
struct pvr_winsys_geometry_state {
- struct {
- uint64_t pds_ctrl;
- uint32_t ppp_ctrl;
- uint32_t te_psg;
- uint32_t tpu;
- uint64_t tpu_border_colour_table;
- uint64_t vdm_ctrl_stream_base;
- uint32_t vdm_ctx_resume_task0_size;
- } regs;
+ /* Firmware stream buffer. This is the maximum possible size taking into
+ * consideration all HW features.
+ */
+ uint8_t fw_stream[52];
+ uint32_t fw_stream_len;
+
+ /* Firmware extension stream buffer. This is the maximum possible size
+ * taking into considation all quirks and enhancements.
+ */
+ uint8_t fw_ext_stream[12];
+ uint32_t fw_ext_stream_len;
/* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */
uint32_t flags;
} geometry;
struct pvr_winsys_fragment_state {
- struct {
- uint32_t event_pixel_pds_data;
- uint32_t event_pixel_pds_info;
- uint32_t isp_aa;
- uint32_t isp_bgobjdepth;
- uint32_t isp_bgobjvals;
- uint32_t isp_ctl;
- uint64_t isp_dbias_base;
- uint64_t isp_oclqry_base;
- uint64_t isp_scissor_base;
- uint64_t isp_stencil_load_store_base;
- uint64_t isp_zload_store_base;
- uint64_t isp_zlsctl;
- uint32_t isp_zls_pixels;
- uint64_t pbe_word[PVR_MAX_COLOR_ATTACHMENTS]
- [ROGUE_NUM_PBESTATE_REG_WORDS];
- uint32_t pixel_phantom;
- uint64_t pds_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
- uint64_t pds_pr_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
- uint32_t tpu;
- uint64_t tpu_border_colour_table;
- uint32_t usc_pixel_output_ctrl;
- } regs;
+ /* Firmware stream buffer. This is the maximum possible size taking into
+ * consideration all HW features.
+ */
+ uint8_t fw_stream[432];
+ uint32_t fw_stream_len;
+
+ /* Firmware extension stream buffer. This is the maximum possible size
+ * taking into considation all quirks and enhancements.
+ */
+ uint8_t fw_ext_stream[8];
+ uint32_t fw_ext_stream_len;
/* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */
uint32_t flags;
- uint32_t zls_stride;
- uint32_t sls_stride;
} fragment;
};
VkResult (*render_submit)(
const struct pvr_winsys_render_ctx *ctx,
const struct pvr_winsys_render_submit_info *submit_info,
+ const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync_geom,
struct vk_sync *signal_sync_frag);
VkResult (*compute_submit)(
const struct pvr_winsys_compute_ctx *ctx,
const struct pvr_winsys_compute_submit_info *submit_info,
+ const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync);
VkResult (*transfer_ctx_create)(
/* Only used when feature VDM_OBJECT_LEVEL_LLS present. */
uint32_t vdm_context_resume_task3_size;
- /* Only used when BRN 56279 or BRN 67381 present. */
+ /* Only used when BRN 67381 present. */
uint32_t pds_ctrl;
uint32_t view_idx;
*/
struct rogue_fwif_cmd_ta_3d_shared cmd_shared;
- struct rogue_fwif_ta_regs ALIGN_ATTR(8) geom_regs;
+ struct rogue_fwif_ta_regs ALIGN_ATTR(8) regs;
uint32_t ALIGN_ATTR(8) flags;
/**
* Holds the TA/3D fence value to allow the 3D partial render command
#include "fw-api/pvr_rogue_fwif.h"
#include "fw-api/pvr_rogue_fwif_rf.h"
+#include "pvr_device_info.h"
#include "pvr_private.h"
#include "pvr_srv.h"
#include "pvr_srv_bridge.h"
vk_free(srv_ws->alloc, srv_ctx);
}
+static void
+pvr_srv_compute_cmd_stream_load(struct rogue_fwif_cmd_compute *const cmd,
+ const uint8_t *const stream,
+ const uint32_t stream_len,
+ const struct pvr_device_info *const dev_info)
+{
+ const uint32_t *stream_ptr = (const uint32_t *)stream;
+ struct rogue_fwif_cdm_regs *const regs = &cmd->regs;
+
+ regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM);
+
+ regs->cdm_ctrl_stream_base = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE);
+
+ regs->cdm_context_state_base_addr = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE);
+
+ regs->cdm_resume_pds1 = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1);
+
+ regs->cdm_item = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_CDM_ITEM);
+
+ if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+ regs->compute_cluster = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER);
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ cmd->execute_count = *stream_ptr;
+ stream_ptr++;
+ }
+
+ assert((const uint8_t *)stream_ptr - stream == stream_len);
+}
+
+static void pvr_srv_compute_cmd_ext_stream_load(
+ struct rogue_fwif_cmd_compute *const cmd,
+ const uint8_t *const ext_stream,
+ const uint32_t ext_stream_len,
+ const struct pvr_device_info *const dev_info)
+{
+ const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
+ struct rogue_fwif_cdm_regs *const regs = &cmd->regs;
+
+ struct PVRX(FW_STREAM_EXTHDR_COMPUTE0) header0;
+
+ header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_COMPUTE0);
+ ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0);
+
+ assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
+ if (header0.has_brn49927) {
+ regs->tpu = *ext_stream_ptr;
+ ext_stream_ptr += pvr_cmd_length(CR_TPU);
+ }
+
+ assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
+}
+
static void pvr_srv_compute_cmd_init(
const struct pvr_winsys_compute_submit_info *submit_info,
- struct rogue_fwif_cmd_compute *cmd)
+ struct rogue_fwif_cmd_compute *cmd,
+ const struct pvr_device_info *const dev_info)
{
- struct rogue_fwif_cdm_regs *fw_regs = &cmd->regs;
-
memset(cmd, 0, sizeof(*cmd));
cmd->cmn.frame_num = submit_info->frame_num;
- fw_regs->tpu_border_colour_table = submit_info->regs.tpu_border_colour_table;
- fw_regs->cdm_item = submit_info->regs.cdm_item;
- fw_regs->compute_cluster = submit_info->regs.compute_cluster;
- fw_regs->cdm_ctrl_stream_base = submit_info->regs.cdm_ctrl_stream_base;
- fw_regs->cdm_context_state_base_addr =
- submit_info->regs.cdm_ctx_state_base_addr;
- fw_regs->tpu = submit_info->regs.tpu;
- fw_regs->cdm_resume_pds1 = submit_info->regs.cdm_resume_pds1;
+ pvr_srv_compute_cmd_stream_load(cmd,
+ submit_info->fw_stream,
+ submit_info->fw_stream_len,
+ dev_info);
+
+ if (submit_info->fw_ext_stream_len) {
+ pvr_srv_compute_cmd_ext_stream_load(cmd,
+ submit_info->fw_ext_stream,
+ submit_info->fw_ext_stream_len,
+ dev_info);
+ }
if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP)
cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
VkResult pvr_srv_winsys_compute_submit(
const struct pvr_winsys_compute_ctx *ctx,
const struct pvr_winsys_compute_submit_info *submit_info,
+ const struct pvr_device_info *const dev_info,
struct vk_sync *signal_sync)
{
const struct pvr_srv_winsys_compute_ctx *srv_ctx =
int in_fd = -1;
int fence;
- pvr_srv_compute_cmd_init(submit_info, &compute_cmd);
+ pvr_srv_compute_cmd_init(submit_info, &compute_cmd, dev_info);
for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);
#include <vulkan/vulkan.h>
+struct pvr_device_info;
struct pvr_winsys;
struct pvr_winsys_compute_ctx;
struct pvr_winsys_compute_ctx_create_info;
VkResult pvr_srv_winsys_compute_submit(
const struct pvr_winsys_compute_ctx *ctx,
const struct pvr_winsys_compute_submit_info *submit_info,
+ const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync);
#endif /* PVR_SRV_JOB_COMPUTE_H */
vk_free(srv_ws->alloc, srv_ctx);
}
+static void
+pvr_srv_geometry_cmd_stream_load(struct rogue_fwif_cmd_ta *const cmd,
+ const uint8_t *const stream,
+ const uint32_t stream_len,
+ const struct pvr_device_info *const dev_info)
+{
+ const uint32_t *stream_ptr = (const uint32_t *)stream;
+ struct rogue_fwif_ta_regs *const regs = &cmd->regs;
+
+ regs->vdm_ctrl_stream_base = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE);
+
+ regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM);
+
+ regs->ppp_ctrl = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
+
+ regs->te_psg = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_TE_PSG);
+
+ regs->vdm_context_resume_task0_size = *stream_ptr;
+ stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
+
+ regs->view_idx = *stream_ptr;
+ stream_ptr++;
+
+ assert((const uint8_t *)stream_ptr - stream == stream_len);
+}
+
+static void pvr_srv_geometry_cmd_ext_stream_load(
+ struct rogue_fwif_cmd_ta *const cmd,
+ const uint8_t *const ext_stream,
+ const uint32_t ext_stream_len,
+ const struct pvr_device_info *const dev_info)
+{
+ const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
+ struct rogue_fwif_ta_regs *const regs = &cmd->regs;
+
+ struct PVRX(FW_STREAM_EXTHDR_GEOM0) header0;
+
+ header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_GEOM0);
+ ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0);
+
+ assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
+ if (header0.has_brn49927) {
+ regs->tpu = *ext_stream_ptr;
+ ext_stream_ptr += pvr_cmd_length(CR_TPU);
+ }
+
+ assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
+}
+
static void pvr_srv_geometry_cmd_init(
const struct pvr_winsys_render_submit_info *submit_info,
const struct pvr_srv_sync_prim *sync_prim,
- struct rogue_fwif_cmd_ta *cmd)
+ struct rogue_fwif_cmd_ta *cmd,
+ const struct pvr_device_info *const dev_info)
{
const struct pvr_winsys_geometry_state *state = &submit_info->geometry;
- struct rogue_fwif_ta_regs *fw_regs = &cmd->geom_regs;
memset(cmd, 0, sizeof(*cmd));
cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
- fw_regs->vdm_ctrl_stream_base = state->regs.vdm_ctrl_stream_base;
- fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
- fw_regs->ppp_ctrl = state->regs.ppp_ctrl;
- fw_regs->te_psg = state->regs.te_psg;
- fw_regs->tpu = state->regs.tpu;
- fw_regs->vdm_context_resume_task0_size =
- state->regs.vdm_ctx_resume_task0_size;
+ pvr_srv_geometry_cmd_stream_load(cmd,
+ state->fw_stream,
+ state->fw_stream_len,
+ dev_info);
- assert(state->regs.pds_ctrl >> 32U == 0U);
- fw_regs->pds_ctrl = (uint32_t)state->regs.pds_ctrl;
+ if (state->fw_ext_stream_len) {
+ pvr_srv_geometry_cmd_ext_stream_load(cmd,
+ state->fw_ext_stream,
+ state->fw_ext_stream_len,
+ dev_info);
+ }
if (state->flags & PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY)
cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK;
cmd->partial_render_ta_3d_fence.value = sync_prim->value;
}
+static void
+pvr_srv_fragment_cmd_stream_load(struct rogue_fwif_cmd_3d *const cmd,
+ const uint8_t *const stream,
+ const uint32_t stream_len,
+ const struct pvr_device_info *const dev_info)
+{
+ const uint32_t *stream_ptr = (const uint32_t *)stream;
+ struct rogue_fwif_3d_regs *const regs = &cmd->regs;
+
+ regs->isp_scissor_base = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
+
+ regs->isp_dbias_base = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE);
+
+ regs->isp_oclqry_base = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
+
+ regs->isp_zlsctl = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
+
+ regs->isp_zload_store_base = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
+
+ regs->isp_stencil_load_store_base = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
+
+ regs->fb_cdc_zls = *(const uint64_t *)stream_ptr;
+ stream_ptr += 2U;
+
+ STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word) == 8U);
+ STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word[0]) == 3U);
+ STATIC_ASSERT(sizeof(regs->pbe_word[0][0]) == sizeof(uint64_t));
+ memcpy(regs->pbe_word, stream_ptr, sizeof(regs->pbe_word));
+ stream_ptr += 8U * 3U * 2U;
+
+ regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
+
+ STATIC_ASSERT(ARRAY_SIZE(regs->pds_bgnd) == 3U);
+ STATIC_ASSERT(sizeof(regs->pds_bgnd[0]) == sizeof(uint64_t));
+ memcpy(regs->pds_bgnd, stream_ptr, sizeof(regs->pds_bgnd));
+ stream_ptr += 3U * 2U;
+
+ STATIC_ASSERT(ARRAY_SIZE(regs->pds_pr_bgnd) == 3U);
+ STATIC_ASSERT(sizeof(regs->pds_pr_bgnd[0]) == sizeof(uint64_t));
+ memcpy(regs->pds_pr_bgnd, stream_ptr, sizeof(regs->pds_pr_bgnd));
+ stream_ptr += 3U * 2U;
+
+ STATIC_ASSERT(ARRAY_SIZE(regs->usc_clear_register) == 8U);
+ STATIC_ASSERT(sizeof(regs->usc_clear_register[0]) == sizeof(uint32_t));
+ memcpy(regs->usc_clear_register,
+ stream_ptr,
+ sizeof(regs->usc_clear_register));
+ stream_ptr += 8U;
+
+ regs->usc_pixel_output_ctrl = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
+
+ regs->isp_bgobjdepth = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
+
+ regs->isp_bgobjvals = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
+
+ regs->isp_aa = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_AA);
+
+ regs->isp_ctl = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_CTL);
+
+ regs->event_pixel_pds_info = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
+
+ if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+ regs->pixel_phantom = *stream_ptr;
+ stream_ptr++;
+ }
+
+ regs->view_idx = *stream_ptr;
+ stream_ptr++;
+
+ regs->event_pixel_pds_data = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
+
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ regs->isp_oclqry_stride = *stream_ptr;
+ stream_ptr++;
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
+ regs->isp_zls_pixels = *stream_ptr;
+ stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
+ }
+
+ cmd->zls_stride = *stream_ptr;
+ stream_ptr++;
+
+ cmd->sls_stride = *stream_ptr;
+ stream_ptr++;
+
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ cmd->execute_count = *stream_ptr;
+ stream_ptr++;
+ }
+
+ assert((const uint8_t *)stream_ptr - stream == stream_len);
+}
+
+static void pvr_srv_fragment_cmd_ext_stream_load(
+ struct rogue_fwif_cmd_3d *const cmd,
+ const uint8_t *const ext_stream,
+ const uint32_t ext_stream_len,
+ const struct pvr_device_info *const dev_info)
+{
+ const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
+ struct rogue_fwif_3d_regs *const regs = &cmd->regs;
+
+ struct PVRX(FW_STREAM_EXTHDR_FRAG0) header0;
+
+ header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_FRAG0);
+ ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0);
+
+ assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
+ if (header0.has_brn49927) {
+ regs->tpu = *ext_stream_ptr;
+ ext_stream_ptr += pvr_cmd_length(CR_TPU);
+ }
+
+ assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
+}
+
static void pvr_srv_fragment_cmd_init(
const struct pvr_winsys_render_submit_info *submit_info,
- struct rogue_fwif_cmd_3d *cmd)
+ struct rogue_fwif_cmd_3d *cmd,
+ const struct pvr_device_info *dev_info)
{
const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
- struct rogue_fwif_3d_regs *fw_regs = &cmd->regs;
memset(cmd, 0, sizeof(*cmd));
cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
- fw_regs->usc_pixel_output_ctrl = state->regs.usc_pixel_output_ctrl;
- fw_regs->isp_bgobjdepth = state->regs.isp_bgobjdepth;
- fw_regs->isp_bgobjvals = state->regs.isp_bgobjvals;
- fw_regs->isp_aa = state->regs.isp_aa;
- fw_regs->isp_ctl = state->regs.isp_ctl;
- fw_regs->tpu = state->regs.tpu;
- fw_regs->event_pixel_pds_info = state->regs.event_pixel_pds_info;
- fw_regs->pixel_phantom = state->regs.pixel_phantom;
- fw_regs->event_pixel_pds_data = state->regs.event_pixel_pds_data;
- fw_regs->isp_scissor_base = state->regs.isp_scissor_base;
- fw_regs->isp_dbias_base = state->regs.isp_dbias_base;
- fw_regs->isp_oclqry_base = state->regs.isp_oclqry_base;
- fw_regs->isp_zlsctl = state->regs.isp_zlsctl;
- fw_regs->isp_zload_store_base = state->regs.isp_zload_store_base;
- fw_regs->isp_stencil_load_store_base =
- state->regs.isp_stencil_load_store_base;
- fw_regs->isp_zls_pixels = state->regs.isp_zls_pixels;
-
- STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word) ==
- ARRAY_SIZE(state->regs.pbe_word));
-
- STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word[0]) <=
- ARRAY_SIZE(state->regs.pbe_word[0]));
-
-#if !defined(NDEBUG)
- /* Depending on the hardware we might have more PBE words than the firmware
- * accepts so check that the extra words are 0.
- */
- if (ARRAY_SIZE(fw_regs->pbe_word[0]) < ARRAY_SIZE(state->regs.pbe_word[0])) {
- /* For each color attachment. */
- for (uint32_t i = 0; i < ARRAY_SIZE(state->regs.pbe_word); i++) {
- /* For each extra PBE word not used by the firmware. */
- for (uint32_t j = ARRAY_SIZE(fw_regs->pbe_word[0]);
- j < ARRAY_SIZE(state->regs.pbe_word[0]);
- j++) {
- assert(state->regs.pbe_word[i][j] == 0);
- }
- }
- }
-#endif
-
- memcpy(fw_regs->pbe_word, state->regs.pbe_word, sizeof(fw_regs->pbe_word));
+ pvr_srv_fragment_cmd_stream_load(cmd,
+ state->fw_stream,
+ state->fw_stream_len,
+ dev_info);
- fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
-
- STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_bgnd) ==
- ARRAY_SIZE(state->regs.pds_bgnd));
- typed_memcpy(fw_regs->pds_bgnd,
- state->regs.pds_bgnd,
- ARRAY_SIZE(fw_regs->pds_bgnd));
-
- STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_pr_bgnd) ==
- ARRAY_SIZE(state->regs.pds_pr_bgnd));
- typed_memcpy(fw_regs->pds_pr_bgnd,
- state->regs.pds_pr_bgnd,
- ARRAY_SIZE(fw_regs->pds_pr_bgnd));
+ if (state->fw_ext_stream_len) {
+ pvr_srv_fragment_cmd_ext_stream_load(cmd,
+ state->fw_ext_stream,
+ state->fw_ext_stream_len,
+ dev_info);
+ }
if (state->flags & PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT)
cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER;
if (state->flags & PVR_WINSYS_FRAG_FLAG_SINGLE_CORE)
cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE;
-
- cmd->zls_stride = state->zls_stride;
- cmd->sls_stride = state->sls_stride;
}
VkResult pvr_srv_winsys_render_submit(
const struct pvr_winsys_render_ctx *ctx,
const struct pvr_winsys_render_submit_info *submit_info,
+ const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync_geom,
struct vk_sync *signal_sync_frag)
{
VkResult result;
- pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd);
- pvr_srv_fragment_cmd_init(submit_info, &frag_cmd);
+ pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
+ pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);
VkResult pvr_srv_winsys_render_submit(
const struct pvr_winsys_render_ctx *ctx,
const struct pvr_winsys_render_submit_info *submit_info,
+ const struct pvr_device_info *dev_info,
struct vk_sync *signal_sync_geom,
struct vk_sync *signal_sync_frag);