pvr: Implement new firmware stream interface
authorMatt Coster <matt.coster@imgtec.com>
Wed, 3 Aug 2022 08:52:17 +0000 (09:52 +0100)
committerMarge Bot <emma+marge@anholt.net>
Fri, 11 Nov 2022 11:34:17 +0000 (11:34 +0000)
Signed-off-by: Matt Coster <matt.coster@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19530>

15 files changed:
src/imagination/common/pvr_device_info.c
src/imagination/common/pvr_device_info.h
src/imagination/csbgen/meson.build
src/imagination/csbgen/rogue_cr.xml
src/imagination/csbgen/rogue_fw.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_hwdefs.h
src/imagination/include/hwdef/rogue_hw_utils.h
src/imagination/vulkan/pvr_job_compute.c
src/imagination/vulkan/pvr_job_render.c
src/imagination/vulkan/winsys/pvr_winsys.h
src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h

index c035467..f26f7d4 100644 (file)
@@ -112,7 +112,6 @@ const struct pvr_device_quirks pvr_device_quirks_4_40_2_51 = {
    .has_brn51764 = true,
    .has_brn52354 = true,
    .has_brn52942 = true,
-   .has_brn56279 = true,
    .has_brn58839 = true,
    .has_brn62269 = true,
    .has_brn66011 = true,
index f168a1c..6b7d193 100644 (file)
@@ -344,7 +344,6 @@ struct pvr_device_quirks {
    bool has_brn51764 : 1;
    bool has_brn52354 : 1;
    bool has_brn52942 : 1;
-   bool has_brn56279 : 1;
    bool has_brn58839 : 1;
    bool has_brn62269 : 1;
    bool has_brn66011 : 1;
index ea6a8ff..2ddbd79 100644 (file)
@@ -22,6 +22,7 @@
 pvr_xml_files = [
   'rogue_cdm.xml',
   'rogue_cr.xml',
+  'rogue_fw.xml',
   'rogue_ipf.xml',
   'rogue_lls.xml',
   'rogue_pbestate.xml',
index 9cfaeaf..3477a77 100644 (file)
@@ -264,8 +264,11 @@ SOFTWARE.
     <field name="mask" start="0" end="31" type="uint"/>
   </struct>
 
-  <struct name="PDS_CTRL" length="2">
-    <field name="sm_overlap_enable" start="55" end="55" type="bool"/>
+  <struct name="CDM_ITEM" length="1">
+    <field name="mode" start="0" end="1" type="uint"/>
+  </struct>
+
+  <struct name="PDS_CTRL0" length="1">
     <condition type="if" check="ROGUEXE"/>
       <condition type="if" check="COMPUTE"/>
         <field name="roguexe_max_num_cdm_tasks" start="24" end="31" type="uint"/>
@@ -285,6 +288,10 @@ SOFTWARE.
     <condition type="endif" check="ROGUEXE"/>
   </struct>
 
+  <struct name="PDS_CTRL1" length="1">
+    <field name="sm_overlap_enable" start="23" end="23" type="bool"/>
+  </struct>
+
   <struct name="EVENT_PIXEL_PDS_CODE" length="1">
     <field name="addr" start="4" end="31" shift="4" type="address"/>
   </struct>
diff --git a/src/imagination/csbgen/rogue_fw.xml b/src/imagination/csbgen/rogue_fw.xml
new file mode 100644 (file)
index 0000000..4fa3254
--- /dev/null
@@ -0,0 +1,69 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<!--
+These definitions are based on the ones for the firmware streams found
+in the fwif header (pvr_rogue_fwif_stream.h).
+
+TODO: Once the kernel driver is merged upstream, check to see if this comment
+needs updating.
+-->
+
+<csbgen name="ROGUE" prefix="FW">
+
+  <define name="STREAM_EXTHDR_DATA_MASK" value="0xFFFFFFF"/>
+
+  <enum name="STREAM_EXTHDR_TYPE_COMPUTE">
+    <value name="0" value="0"/>
+  </enum>
+
+  <enum name="STREAM_EXTHDR_TYPE_GEOM">
+    <value name="0" value="0"/>
+  </enum>
+
+  <enum name="STREAM_EXTHDR_TYPE_FRAG">
+    <value name="0" value="0"/>
+  </enum>
+
+  <struct name="STREAM_EXTHDR_COMPUTE0" length="1">
+    <field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_COMPUTE" default="0"/>
+    <field name="continuation" start="28" end="28" type="bool"/>
+    <field name="has_brn49927" start="0" end="0" type="bool"/>
+  </struct>
+
+  <struct name="STREAM_EXTHDR_FRAG0" length="1">
+    <field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_FRAG" default="0"/>
+    <field name="continuation" start="28" end="28" type="bool"/>
+    <field name="has_brn49927" start="1" end="1" type="bool"/>
+    <field name="has_brn47217" start="0" end="0" type="bool"/>
+  </struct>
+
+  <struct name="STREAM_EXTHDR_GEOM0" length="1">
+    <field name="type" start="29" end="31" type="STREAM_EXTHDR_TYPE_GEOM" default="0"/>
+    <field name="continuation" start="28" end="28" type="bool"/>
+    <field name="has_brn49927" start="0" end="0" type="bool"/>
+  </struct>
+
+</csbgen>
index 75a3a47..bf4345a 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "rogue_cdm.h"
 #include "rogue_cr.h"
+#include "rogue_fw.h"
 #include "rogue_ipf.h"
 #include "rogue_lls.h"
 #include "rogue_pbestate.h"
index 9118cce..f5d88cb 100644 (file)
@@ -103,16 +103,6 @@ rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info,
 }
 
 static inline uint32_t
-rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info)
-{
-   /* Default value based on the minimum value found in all existing cores. */
-   uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U);
-
-   /* FIXME: Where does the 9 come from? */
-   return max_usc_tasks - 9;
-}
-
-static inline uint32_t
 rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
 {
    if (PVR_HAS_FEATURE(dev_info, eight_output_registers))
index cf29bfc..9a378c9 100644 (file)
 #include "pvr_winsys.h"
 #include "util/macros.h"
 
-static void pvr_compute_job_ws_submit_info_init(
-   struct pvr_compute_ctx *ctx,
-   struct pvr_sub_cmd_compute *sub_cmd,
-   struct vk_sync *barrier,
-   struct vk_sync **waits,
-   uint32_t wait_count,
-   uint32_t *stage_flags,
-   struct pvr_winsys_compute_submit_info *submit_info)
+static void
+pvr_submit_info_stream_init(struct pvr_compute_ctx *ctx,
+                            struct pvr_sub_cmd_compute *sub_cmd,
+                            struct pvr_winsys_compute_submit_info *submit_info)
 {
-   const struct pvr_device *const device = ctx->device;
-   const struct pvr_physical_device *const pdevice = device->pdevice;
+   const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
    const struct pvr_device_runtime_info *const dev_runtime_info =
       &pdevice->dev_runtime_info;
    const struct pvr_device_info *const dev_info = &pdevice->dev_info;
    const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
-   uint32_t shared_regs = sub_cmd->num_shared_regs;
-
-   submit_info->frame_num = device->global_queue_present_count;
-   submit_info->job_num = device->global_queue_job_count;
-
-   submit_info->barrier = barrier;
-
-   submit_info->waits = waits;
-   submit_info->wait_count = wait_count;
-   submit_info->stage_flags = stage_flags;
 
-   pvr_csb_pack (&submit_info->regs.cdm_ctrl_stream_base,
-                 CR_CDM_CTRL_STREAM_BASE,
-                 value) {
-      value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
-   }
+   uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream;
 
-   /* FIXME: Need to set up the border color table at device creation
-    * time. Set to invalid for the time being.
+   /* FIXME: Need to set up the border color table at device creation time. Set
+    * to invalid for the time being.
     */
-   pvr_csb_pack (&submit_info->regs.tpu_border_colour_table,
+   pvr_csb_pack ((uint64_t *)stream_ptr,
                  CR_TPU_BORDER_COLOUR_TABLE_CDM,
                  value) {
       value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
    }
+   stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM);
 
-   if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
-      submit_info->regs.cdm_item = 0;
-
-   pvr_csb_pack (&submit_info->regs.tpu, CR_TPU, value) {
-      value.tag_cem_4k_face_packing = true;
-   }
-
-   pvr_csb_pack (&submit_info->regs.compute_cluster,
-                 CR_COMPUTE_CLUSTER,
-                 value) {
-      if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
-          PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
-          dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
-         /* Each phantom has its own MCU, so atomicity can only be guaranteed
-          * when all work items are processed on the same phantom. This means we
-          * need to disable all USCs other than those of the first phantom,
-          * which has 4 clusters.
-          */
-         value.mask = 0xFU;
-      } else {
-         value.mask = 0U;
-      }
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CTRL_STREAM_BASE, value) {
+      value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
    }
+   stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE);
 
-   pvr_csb_pack (&submit_info->regs.cdm_ctx_state_base_addr,
-                 CR_CDM_CONTEXT_STATE_BASE,
-                 state) {
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CONTEXT_STATE_BASE, state) {
       state.addr = ctx_switch->compute_state_bo->vma->dev_addr;
    }
+   stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE);
 
-   pvr_csb_pack (&submit_info->regs.cdm_resume_pds1,
-                 CR_CDM_CONTEXT_PDS1,
-                 state) {
+   pvr_csb_pack (stream_ptr, CR_CDM_CONTEXT_PDS1, state) {
       /* Convert the data size from dwords to bytes. */
       const uint32_t load_program_data_size =
          ctx_switch->sr[0].pds.load_program.data_size * 4U;
@@ -118,7 +78,7 @@ static void pvr_compute_job_ws_submit_info_init(
       state.unified_size = ctx_switch->sr[0].usc.unified_size;
       state.common_shared = true;
       state.common_size =
-         DIV_ROUND_UP(shared_regs << 2,
+         DIV_ROUND_UP(sub_cmd->num_shared_regs << 2,
                       PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE));
       state.temp_size = 0;
 
@@ -129,6 +89,99 @@ static void pvr_compute_job_ws_submit_info_init(
          load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE);
       state.fence = false;
    }
+   stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1);
+
+   if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) {
+      pvr_csb_pack (stream_ptr, CR_CDM_ITEM, value) {
+         value.mode = 0;
+      }
+      stream_ptr += pvr_cmd_length(CR_CDM_ITEM);
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+      pvr_csb_pack (stream_ptr, CR_COMPUTE_CLUSTER, value) {
+         if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+             dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
+            /* Each phantom has its own MCU, so atomicity can only be
+             * guaranteed when all work items are processed on the same
+             * phantom. This means we need to disable all USCs other than
+             * those of the first phantom, which has 4 clusters.
+             */
+            value.mask = 0xFU;
+         } else {
+            value.mask = 0U;
+         }
+      }
+      stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER);
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+      pvr_finishme(
+         "Emit execute_count when feature gpu_multicore_support is present");
+      *stream_ptr = 0;
+      stream_ptr++;
+   }
+
+   submit_info->fw_stream_len = (uint8_t *)stream_ptr - submit_info->fw_stream;
+   assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream));
+}
+
+static void pvr_submit_info_ext_stream_init(
+   struct pvr_compute_ctx *ctx,
+   struct pvr_winsys_compute_submit_info *submit_info)
+{
+   const struct pvr_device_info *const dev_info =
+      &ctx->device->pdevice->dev_info;
+
+   uint32_t *ext_stream_ptr = (uint32_t *)submit_info->fw_ext_stream;
+   uint32_t *header0_ptr;
+
+   header0_ptr = ext_stream_ptr;
+   ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0);
+
+   pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_COMPUTE0, header0) {
+      if (PVR_HAS_QUIRK(dev_info, 49927)) {
+         header0.has_brn49927 = true;
+
+         pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
+            value.tag_cem_4k_face_packing = true;
+         }
+         ext_stream_ptr += pvr_cmd_length(CR_TPU);
+      }
+   }
+
+   submit_info->fw_ext_stream_len =
+      (uint8_t *)ext_stream_ptr - submit_info->fw_ext_stream;
+   assert(submit_info->fw_ext_stream_len <=
+          ARRAY_SIZE(submit_info->fw_ext_stream));
+
+   if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
+      submit_info->fw_ext_stream_len = 0;
+}
+
+static void pvr_compute_job_ws_submit_info_init(
+   struct pvr_compute_ctx *ctx,
+   struct pvr_sub_cmd_compute *sub_cmd,
+   struct vk_sync *barrier,
+   struct vk_sync **waits,
+   uint32_t wait_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_compute_submit_info *submit_info)
+{
+   const struct pvr_device *const device = ctx->device;
+   const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
+
+   submit_info->frame_num = device->global_queue_present_count;
+   submit_info->job_num = device->global_queue_job_count;
+
+   submit_info->barrier = barrier;
+
+   submit_info->waits = waits;
+   submit_info->wait_count = wait_count;
+   submit_info->stage_flags = stage_flags;
+
+   pvr_submit_info_stream_init(ctx, sub_cmd, submit_info);
+   pvr_submit_info_ext_stream_init(ctx, submit_info);
 
    if (sub_cmd->uses_barrier)
       submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
@@ -160,5 +213,6 @@ VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
 
    return device->ws->ops->compute_submit(ctx->ws_ctx,
                                           &submit_info,
+                                          &device->pdevice->dev_info,
                                           signal_sync);
 }
index 2f74144..03cb014 100644 (file)
@@ -1165,30 +1165,33 @@ void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *rt_dataset)
    vk_free(&device->vk.alloc, rt_dataset);
 }
 
-static void
-pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
-                                      struct pvr_render_job *job,
-                                      struct pvr_winsys_geometry_state *state)
+static void pvr_geom_state_stream_init(struct pvr_render_ctx *ctx,
+                                       struct pvr_render_job *job,
+                                       struct pvr_winsys_geometry_state *state)
 {
    const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
 
-   /* FIXME: Should this just be done unconditionally? The firmware will just
-    * ignore the value anyway.
-    */
-   if (PVR_HAS_QUIRK(dev_info, 56279)) {
-      pvr_csb_pack (&state->regs.pds_ctrl, CR_PDS_CTRL, value) {
-         value.max_num_vdm_tasks = rogue_get_max_num_vdm_pds_tasks(dev_info);
-      }
-   } else {
-      state->regs.pds_ctrl = 0;
+   uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
+
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_VDM_CTRL_STREAM_BASE, value) {
+      value.addr = job->ctrl_stream_addr;
    }
+   stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE);
 
-   pvr_csb_pack (&state->regs.ppp_ctrl, CR_PPP_CTRL, value) {
+   pvr_csb_pack ((uint64_t *)stream_ptr,
+                 CR_TPU_BORDER_COLOUR_TABLE_VDM,
+                 value) {
+      value.border_colour_table_address = job->border_colour_table_addr;
+   }
+   stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM);
+
+   pvr_csb_pack (stream_ptr, CR_PPP_CTRL, value) {
       value.wclampen = true;
       value.fixed_point_format = 1;
    }
+   stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
 
-   pvr_csb_pack (&state->regs.te_psg, CR_TE_PSG, value) {
+   pvr_csb_pack (stream_ptr, CR_TE_PSG, value) {
       value.completeonterminate = job->geometry_terminate;
 
       value.region_stride = job->rt_dataset->rgn_headers_stride /
@@ -1196,40 +1199,71 @@ pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
 
       value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942);
    }
-
-   /* The set up of CR_TPU must be identical to
-    * pvr_render_job_ws_fragment_state_init().
-    */
-   pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
-      value.tag_cem_4k_face_packing = true;
-   }
-
-   pvr_csb_pack (&state->regs.tpu_border_colour_table,
-                 CR_TPU_BORDER_COLOUR_TABLE_VDM,
-                 value) {
-      value.border_colour_table_address = job->border_colour_table_addr;
-   }
-
-   pvr_csb_pack (&state->regs.vdm_ctrl_stream_base,
-                 CR_VDM_CTRL_STREAM_BASE,
-                 value) {
-      value.addr = job->ctrl_stream_addr;
-   }
+   stream_ptr += pvr_cmd_length(CR_TE_PSG);
 
    /* Set up the USC common size for the context switch resume/load program
     * (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created
     * as part of the render context.
     */
-   pvr_csb_pack (&state->regs.vdm_ctx_resume_task0_size,
-                 VDMCTRL_PDS_STATE0,
-                 value) {
+   pvr_csb_pack (stream_ptr, VDMCTRL_PDS_STATE0, value) {
       /* Calculate the size in bytes. */
       const uint16_t shared_registers_size = job->max_shared_registers * 4;
 
       value.usc_common_size =
          DIV_ROUND_UP(shared_registers_size,
                       PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
-   };
+   }
+   stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
+
+   /* Set up view_idx to 0 */
+   *stream_ptr = 0;
+   stream_ptr++;
+
+   state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream;
+   assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
+}
+
+static void
+pvr_geom_state_stream_ext_init(struct pvr_render_ctx *ctx,
+                               struct pvr_render_job *job,
+                               struct pvr_winsys_geometry_state *state)
+{
+   const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+
+   uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream;
+   uint32_t *header0_ptr;
+
+   header0_ptr = ext_stream_ptr;
+   ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0);
+
+   pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_GEOM0, header0) {
+      if (PVR_HAS_QUIRK(dev_info, 49927)) {
+         header0.has_brn49927 = true;
+
+         /* The set up of CR_TPU must be identical to
+          * pvr_render_job_ws_fragment_state_stream_ext_init().
+          */
+         pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
+            value.tag_cem_4k_face_packing = true;
+         }
+         ext_stream_ptr += pvr_cmd_length(CR_TPU);
+      }
+   }
+
+   state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream;
+   assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream));
+
+   if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
+      state->fw_ext_stream_len = 0;
+}
+
+static void
+pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
+                                      struct pvr_render_job *job,
+                                      struct pvr_winsys_geometry_state *state)
+{
+   pvr_geom_state_stream_init(ctx, job, state);
+   pvr_geom_state_stream_ext_init(ctx, job, state);
 
    state->flags = 0;
 
@@ -1295,19 +1329,20 @@ pvr_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info,
    }
 }
 
-static void
-pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
-                                      struct pvr_render_job *job,
-                                      struct pvr_winsys_fragment_state *state)
+static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
+                                       struct pvr_render_job *job,
+                                       struct pvr_winsys_fragment_state *state)
 {
+   const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &pdevice->dev_runtime_info;
+   const struct pvr_device_info *dev_info = &pdevice->dev_info;
    const enum PVRX(CR_ISP_AA_MODE_TYPE)
       isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples);
-   const struct pvr_device_runtime_info *dev_runtime_info =
-      &ctx->device->pdevice->dev_runtime_info;
-   const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
-   uint32_t isp_ctl;
 
-   /* FIXME: what to do when job->run_frag is false? */
+   uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
+   uint32_t pixel_ctl;
+   uint32_t isp_ctl;
 
    /* FIXME: pass in the number of samples rather than isp_aa_mode? */
    pvr_setup_tiles_in_flight(dev_info,
@@ -1317,72 +1352,27 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
                              false,
                              job->max_tiles_in_flight,
                              &isp_ctl,
-                             &state->regs.usc_pixel_output_ctrl);
+                             &pixel_ctl);
 
-   pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, value) {
-      value.sample_pos = true;
-
-      /* FIXME: There are a number of things that cause this to be set, this
-       * is just one of them.
-       */
-      value.process_empty_tiles = job->process_empty_tiles;
-   }
-
-   /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
-    * possible to fully pack CR_ISP_CTL above rather than having to OR in part
-    * of the value.
-    */
-   state->regs.isp_ctl |= isp_ctl;
-
-   pvr_csb_pack (&state->regs.isp_aa, CR_ISP_AA, value) {
-      value.mode = isp_aa_mode;
-   }
-
-   /* The set up of CR_TPU must be identical to
-    * pvr_render_job_ws_geometry_state_init().
-    */
-   pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
-      value.tag_cem_4k_face_packing = true;
-   }
-
-   if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
-       PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
-       dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
-      /* Each phantom has its own MCU, so atomicity can only be guaranteed
-       * when all work items are processed on the same phantom. This means we
-       * need to disable all USCs other than those of the first phantom, which
-       * has 4 clusters. Note that we only need to do this for atomic
-       * operations in fragment shaders, since hardware prevents the TA to run
-       * on more than one phantom anyway.
-       */
-      state->regs.pixel_phantom = 0xF;
-   } else {
-      state->regs.pixel_phantom = 0;
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_SCISSOR_BASE, value) {
+      value.addr = job->scissor_table_addr;
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
 
-   pvr_csb_pack (&state->regs.isp_bgobjvals, CR_ISP_BGOBJVALS, value) {
-      value.enablebgtag = job->enable_bg_tag;
-
-      value.mask = true;
-
-      /* FIXME: Hard code this for now as we don't currently support any
-       * stencil image formats.
-       */
-      value.stencil = 0xFF;
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_DBIAS_BASE, value) {
+      value.addr = job->depth_bias_table_addr;
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE);
 
-   pvr_csb_pack (&state->regs.isp_bgobjdepth, CR_ISP_BGOBJDEPTH, value) {
-      /* FIXME: This is suitable for the single depth format the driver
-       * currently supports, but may need updating to handle other depth
-       * formats.
-       */
-      value.value = fui(job->depth_clear_value);
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_OCLQRY_BASE, value) {
+      value.addr = PVR_DEV_ADDR_INVALID;
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
 
    /* FIXME: Some additional set up needed to support depth and stencil
     * load/store operations.
     */
-   pvr_csb_pack (&state->regs.isp_zlsctl, CR_ISP_ZLSCTL, value) {
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
       uint32_t aligned_width =
          ALIGN_POT(job->depth_physical_width, ROGUE_IPF_TILE_SIZE_PIXELS);
       uint32_t aligned_height =
@@ -1410,47 +1400,96 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
       value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_F32Z);
       value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
 
-   if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
-      pvr_csb_pack (&state->regs.isp_zls_pixels, CR_ISP_ZLS_PIXELS, value) {
-         value.x = job->depth_stride - 1;
-         value.y = job->depth_height - 1;
-      }
-   } else {
-      state->regs.isp_zls_pixels = 0;
-   }
-
-   pvr_csb_pack (&state->regs.isp_zload_store_base, CR_ISP_ZLOAD_BASE, value) {
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLOAD_BASE, value) {
       value.addr = job->depth_addr;
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
 
-   pvr_csb_pack (&state->regs.isp_stencil_load_store_base,
-                 CR_ISP_STENCIL_LOAD_BASE,
-                 value) {
+   pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_STENCIL_LOAD_BASE, value) {
       value.addr = job->stencil_addr;
 
       /* FIXME: May need to set value.enable to true. */
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
+
+   *(uint64_t *)stream_ptr = 0;
+   stream_ptr += 2U;
+
+   STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words) == 8U);
+   STATIC_ASSERT(ARRAY_SIZE(job->pbe_reg_words[0]) == 3U);
+   STATIC_ASSERT(sizeof(job->pbe_reg_words[0][0]) == sizeof(uint64_t));
+   memcpy(stream_ptr, job->pbe_reg_words, sizeof(job->pbe_reg_words));
+   stream_ptr += 8U * 3U * 2U;
 
-   pvr_csb_pack (&state->regs.tpu_border_colour_table,
+   pvr_csb_pack ((uint64_t *)stream_ptr,
                  CR_TPU_BORDER_COLOUR_TABLE_PDM,
                  value) {
       value.border_colour_table_address = job->border_colour_table_addr;
    }
+   stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
 
-   state->regs.isp_oclqry_base = 0;
+   STATIC_ASSERT(ARRAY_SIZE(job->pds_bgnd_reg_values) == 3U);
+   STATIC_ASSERT(sizeof(job->pds_bgnd_reg_values[0]) == sizeof(uint64_t));
+   memcpy(stream_ptr,
+          job->pds_bgnd_reg_values,
+          sizeof(job->pds_bgnd_reg_values));
+   stream_ptr += 3U * 2U;
 
-   pvr_csb_pack (&state->regs.isp_dbias_base, CR_ISP_DBIAS_BASE, value) {
-      value.addr = job->depth_bias_table_addr;
+   /* Set pds_pr_bgnd array to 0 */
+   memset(stream_ptr, 0, 3U * sizeof(uint64_t));
+   stream_ptr += 3U * 2U;
+
+   /* Set usc_clear_register array to 0 */
+   memset(stream_ptr, 0, 8U * sizeof(uint32_t));
+   stream_ptr += 8U;
+
+   *stream_ptr = pixel_ctl;
+   stream_ptr++;
+
+   pvr_csb_pack (stream_ptr, CR_ISP_BGOBJDEPTH, value) {
+      /* FIXME: This is suitable for the single depth format the driver
+       * currently supports, but may need updating to handle other depth
+       * formats.
+       */
+      value.value = fui(job->depth_clear_value);
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
 
-   pvr_csb_pack (&state->regs.isp_scissor_base, CR_ISP_SCISSOR_BASE, value) {
-      value.addr = job->scissor_table_addr;
+   pvr_csb_pack (stream_ptr, CR_ISP_BGOBJVALS, value) {
+      value.enablebgtag = job->enable_bg_tag;
+
+      value.mask = true;
+
+      /* FIXME: Hard code this for now as we don't currently support any
+       * stencil image formats.
+       */
+      value.stencil = 0xFF;
    }
+   stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
 
-   pvr_csb_pack (&state->regs.event_pixel_pds_info,
-                 CR_EVENT_PIXEL_PDS_INFO,
-                 value) {
+   pvr_csb_pack (stream_ptr, CR_ISP_AA, value) {
+      value.mode = isp_aa_mode;
+   }
+   stream_ptr += pvr_cmd_length(CR_ISP_AA);
+
+   pvr_csb_pack (stream_ptr, CR_ISP_CTL, value) {
+      value.sample_pos = true;
+
+      /* FIXME: There are a number of things that cause this to be set, this
+       * is just one of them.
+       */
+      value.process_empty_tiles = job->process_empty_tiles;
+   }
+   /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
+    * possible to fully pack CR_ISP_CTL above rather than having to OR in part
+    * of the value.
+    */
+   *stream_ptr |= isp_ctl;
+   stream_ptr += pvr_cmd_length(CR_ISP_CTL);
+
+   pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_INFO, value) {
       value.const_size =
          DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords,
                       PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
@@ -1459,32 +1498,114 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
          DIV_ROUND_UP(PVR_STATE_PBE_DWORDS,
                       PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
    }
+   stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
+
+   if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+      uint32_t pixel_phantom = 0;
+
+      if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+          dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
+         /* Each phantom has its own MCU, so atomicity can only be guaranteed
+          * when all work items are processed on the same phantom. This means
+          * we need to disable all USCs other than those of the first
+          * phantom, which has 4 clusters. Note that we only need to do this
+          * for atomic operations in fragment shaders, since hardware
+          * prevents the TA to run on more than one phantom anyway.
+          */
+         pixel_phantom = 0xF;
+      }
 
-   pvr_csb_pack (&state->regs.event_pixel_pds_data,
-                 CR_EVENT_PIXEL_PDS_DATA,
-                 value) {
+      *stream_ptr = pixel_phantom;
+      stream_ptr++;
+   }
+
+   /* Set up view_idx to 0 */
+   *stream_ptr = 0;
+   stream_ptr++;
+
+   pvr_csb_pack (stream_ptr, CR_EVENT_PIXEL_PDS_DATA, value) {
       value.addr = PVR_DEV_ADDR(job->pds_pixel_event_data_offset);
    }
+   stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
 
-   STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word) ==
-                 ARRAY_SIZE(job->pbe_reg_words));
-   STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word[0]) ==
-                 ARRAY_SIZE(job->pbe_reg_words[0]));
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+      pvr_finishme(
+         "Emit isp_oclqry_stride when feature gpu_multicore_support is present");
+      *stream_ptr = 0;
+      stream_ptr++;
+   }
 
-   for (uint32_t i = 0; i < ARRAY_SIZE(job->pbe_reg_words); i++) {
-      state->regs.pbe_word[i][0] = job->pbe_reg_words[i][0];
-      state->regs.pbe_word[i][1] = job->pbe_reg_words[i][1];
-      state->regs.pbe_word[i][2] = job->pbe_reg_words[i][2];
+   if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
+      pvr_csb_pack (stream_ptr, CR_ISP_ZLS_PIXELS, value) {
+         value.x = job->depth_stride - 1;
+         value.y = job->depth_height - 1;
+      }
+      stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
    }
 
-   STATIC_ASSERT(__same_type(state->regs.pds_bgnd, job->pds_bgnd_reg_values));
-   typed_memcpy(state->regs.pds_bgnd,
-                job->pds_bgnd_reg_values,
-                ARRAY_SIZE(state->regs.pds_bgnd));
+   /* zls_stride */
+   *stream_ptr = job->depth_layer_size;
+   stream_ptr++;
+
+   /* sls_stride */
+   *stream_ptr = job->depth_layer_size;
+   stream_ptr++;
 
-   memset(state->regs.pds_pr_bgnd, 0, sizeof(state->regs.pds_pr_bgnd));
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+      pvr_finishme(
+         "Emit execute_count when feature gpu_multicore_support is present");
+      *stream_ptr = 0;
+      stream_ptr++;
+   }
+
+   state->fw_stream_len = (uint8_t *)stream_ptr - state->fw_stream;
+   assert(state->fw_stream_len <= ARRAY_SIZE(state->fw_stream));
+}
+
+static void
+pvr_frag_state_stream_ext_init(struct pvr_render_ctx *ctx,
+                               struct pvr_render_job *job,
+                               struct pvr_winsys_fragment_state *state)
+{
+   const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+
+   uint32_t *ext_stream_ptr = (uint32_t *)state->fw_ext_stream;
+   uint32_t *header0_ptr;
+
+   header0_ptr = ext_stream_ptr;
+   ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0);
+
+   pvr_csb_pack (header0_ptr, FW_STREAM_EXTHDR_FRAG0, header0) {
+      if (PVR_HAS_QUIRK(dev_info, 49927)) {
+         header0.has_brn49927 = true;
+
+         /* The set up of CR_TPU must be identical to
+          * pvr_render_job_ws_geometry_state_stream_ext_init().
+          */
+         pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
+            value.tag_cem_4k_face_packing = true;
+         }
+         ext_stream_ptr += pvr_cmd_length(CR_TPU);
+      }
+   }
+
+   state->fw_ext_stream_len = (uint8_t *)ext_stream_ptr - state->fw_ext_stream;
+   assert(state->fw_ext_stream_len <= ARRAY_SIZE(state->fw_ext_stream));
+
+   if ((*header0_ptr & PVRX(FW_STREAM_EXTHDR_DATA_MASK)) == 0)
+      state->fw_ext_stream_len = 0;
+}
+
+static void
+pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
+                                      struct pvr_render_job *job,
+                                      struct pvr_winsys_fragment_state *state)
+{
+   /* FIXME: what to do when job->run_frag is false? */
+
+   pvr_frag_state_stream_init(ctx, job, state);
+   pvr_frag_state_stream_ext_init(ctx, job, state);
 
-   /* FIXME: Merge geometry and fragment flags into a single flags member? */
    /* FIXME: move to its own function? */
    state->flags = 0;
 
@@ -1499,9 +1620,6 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
 
    if (job->frag_uses_atomic_ops)
       state->flags |= PVR_WINSYS_FRAG_FLAG_SINGLE_CORE;
-
-   state->zls_stride = job->depth_layer_size;
-   state->sls_stride = job->depth_layer_size;
 }
 
 static void pvr_render_job_ws_submit_info_init(
@@ -1533,9 +1651,6 @@ static void pvr_render_job_ws_submit_info_init(
 
    pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry);
    pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment);
-
-   /* These values are expected to match. */
-   assert(submit_info->geometry.regs.tpu == submit_info->fragment.regs.tpu);
 }
 
 VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
@@ -1564,6 +1679,7 @@ VkResult pvr_render_job_submit(struct pvr_render_ctx *ctx,
 
    result = device->ws->ops->render_submit(ctx->ws_ctx,
                                            &submit_info,
+                                           &device->pdevice->dev_info,
                                            signal_sync_geom,
                                            signal_sync_frag);
    if (result != VK_SUCCESS)
index c888725..3013b21 100644 (file)
@@ -309,15 +309,17 @@ struct pvr_winsys_compute_submit_info {
    uint32_t wait_count;
    uint32_t *stage_flags;
 
-   struct {
-      uint64_t tpu_border_colour_table;
-      uint64_t cdm_ctrl_stream_base;
-      uint64_t cdm_ctx_state_base_addr;
-      uint32_t tpu;
-      uint32_t cdm_resume_pds1;
-      uint32_t cdm_item;
-      uint32_t compute_cluster;
-   } regs;
+   /* Firmware stream buffer. This is the maximum possible size taking into
+    * consideration all HW features.
+    */
+   uint8_t fw_stream[92];
+   uint32_t fw_stream_len;
+
+   /* Firmware extension stream buffer. This is the maximum possible size taking
+    * into considation all quirks and enhancements.
+    */
+   uint8_t fw_ext_stream[8];
+   uint32_t fw_ext_stream_len;
 
    /* Must be 0 or a combination of PVR_WINSYS_COMPUTE_FLAG_* flags. */
    uint32_t flags;
@@ -351,49 +353,37 @@ struct pvr_winsys_render_submit_info {
    uint32_t *stage_flags;
 
    struct pvr_winsys_geometry_state {
-      struct {
-         uint64_t pds_ctrl;
-         uint32_t ppp_ctrl;
-         uint32_t te_psg;
-         uint32_t tpu;
-         uint64_t tpu_border_colour_table;
-         uint64_t vdm_ctrl_stream_base;
-         uint32_t vdm_ctx_resume_task0_size;
-      } regs;
+      /* Firmware stream buffer. This is the maximum possible size taking into
+       * consideration all HW features.
+       */
+      uint8_t fw_stream[52];
+      uint32_t fw_stream_len;
+
+      /* Firmware extension stream buffer. This is the maximum possible size
+       * taking into considation all quirks and enhancements.
+       */
+      uint8_t fw_ext_stream[12];
+      uint32_t fw_ext_stream_len;
 
       /* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */
       uint32_t flags;
    } geometry;
 
    struct pvr_winsys_fragment_state {
-      struct {
-         uint32_t event_pixel_pds_data;
-         uint32_t event_pixel_pds_info;
-         uint32_t isp_aa;
-         uint32_t isp_bgobjdepth;
-         uint32_t isp_bgobjvals;
-         uint32_t isp_ctl;
-         uint64_t isp_dbias_base;
-         uint64_t isp_oclqry_base;
-         uint64_t isp_scissor_base;
-         uint64_t isp_stencil_load_store_base;
-         uint64_t isp_zload_store_base;
-         uint64_t isp_zlsctl;
-         uint32_t isp_zls_pixels;
-         uint64_t pbe_word[PVR_MAX_COLOR_ATTACHMENTS]
-                          [ROGUE_NUM_PBESTATE_REG_WORDS];
-         uint32_t pixel_phantom;
-         uint64_t pds_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
-         uint64_t pds_pr_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
-         uint32_t tpu;
-         uint64_t tpu_border_colour_table;
-         uint32_t usc_pixel_output_ctrl;
-      } regs;
+      /* Firmware stream buffer. This is the maximum possible size taking into
+       * consideration all HW features.
+       */
+      uint8_t fw_stream[432];
+      uint32_t fw_stream_len;
+
+      /* Firmware extension stream buffer. This is the maximum possible size
+       * taking into considation all quirks and enhancements.
+       */
+      uint8_t fw_ext_stream[8];
+      uint32_t fw_ext_stream_len;
 
       /* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */
       uint32_t flags;
-      uint32_t zls_stride;
-      uint32_t sls_stride;
    } fragment;
 };
 
@@ -458,6 +448,7 @@ struct pvr_winsys_ops {
    VkResult (*render_submit)(
       const struct pvr_winsys_render_ctx *ctx,
       const struct pvr_winsys_render_submit_info *submit_info,
+      const struct pvr_device_info *dev_info,
       struct vk_sync *signal_sync_geom,
       struct vk_sync *signal_sync_frag);
 
@@ -469,6 +460,7 @@ struct pvr_winsys_ops {
    VkResult (*compute_submit)(
       const struct pvr_winsys_compute_ctx *ctx,
       const struct pvr_winsys_compute_submit_info *submit_info,
+      const struct pvr_device_info *dev_info,
       struct vk_sync *signal_sync);
 
    VkResult (*transfer_ctx_create)(
index 983a33f..9774ae7 100644 (file)
@@ -168,7 +168,7 @@ struct rogue_fwif_ta_regs {
    /* Only used when feature VDM_OBJECT_LEVEL_LLS present. */
    uint32_t vdm_context_resume_task3_size;
 
-   /* Only used when BRN 56279 or BRN 67381 present. */
+   /* Only used when BRN 67381 present. */
    uint32_t pds_ctrl;
 
    uint32_t view_idx;
@@ -208,7 +208,7 @@ struct rogue_fwif_cmd_ta {
     */
    struct rogue_fwif_cmd_ta_3d_shared cmd_shared;
 
-   struct rogue_fwif_ta_regs ALIGN_ATTR(8) geom_regs;
+   struct rogue_fwif_ta_regs ALIGN_ATTR(8) regs;
    uint32_t ALIGN_ATTR(8) flags;
    /**
     * Holds the TA/3D fence value to allow the 3D partial render command
index 03faa25..5208cd8 100644 (file)
@@ -30,6 +30,7 @@
 
 #include "fw-api/pvr_rogue_fwif.h"
 #include "fw-api/pvr_rogue_fwif_rf.h"
+#include "pvr_device_info.h"
 #include "pvr_private.h"
 #include "pvr_srv.h"
 #include "pvr_srv_bridge.h"
@@ -136,24 +137,86 @@ void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx)
    vk_free(srv_ws->alloc, srv_ctx);
 }
 
+static void
+pvr_srv_compute_cmd_stream_load(struct rogue_fwif_cmd_compute *const cmd,
+                                const uint8_t *const stream,
+                                const uint32_t stream_len,
+                                const struct pvr_device_info *const dev_info)
+{
+   const uint32_t *stream_ptr = (const uint32_t *)stream;
+   struct rogue_fwif_cdm_regs *const regs = &cmd->regs;
+
+   regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM);
+
+   regs->cdm_ctrl_stream_base = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE);
+
+   regs->cdm_context_state_base_addr = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE);
+
+   regs->cdm_resume_pds1 = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1);
+
+   regs->cdm_item = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_CDM_ITEM);
+
+   if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+      regs->compute_cluster = *stream_ptr;
+      stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER);
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+      cmd->execute_count = *stream_ptr;
+      stream_ptr++;
+   }
+
+   assert((const uint8_t *)stream_ptr - stream == stream_len);
+}
+
+static void pvr_srv_compute_cmd_ext_stream_load(
+   struct rogue_fwif_cmd_compute *const cmd,
+   const uint8_t *const ext_stream,
+   const uint32_t ext_stream_len,
+   const struct pvr_device_info *const dev_info)
+{
+   const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
+   struct rogue_fwif_cdm_regs *const regs = &cmd->regs;
+
+   struct PVRX(FW_STREAM_EXTHDR_COMPUTE0) header0;
+
+   header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_COMPUTE0);
+   ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_COMPUTE0);
+
+   assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
+   if (header0.has_brn49927) {
+      regs->tpu = *ext_stream_ptr;
+      ext_stream_ptr += pvr_cmd_length(CR_TPU);
+   }
+
+   assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
+}
+
 static void pvr_srv_compute_cmd_init(
    const struct pvr_winsys_compute_submit_info *submit_info,
-   struct rogue_fwif_cmd_compute *cmd)
+   struct rogue_fwif_cmd_compute *cmd,
+   const struct pvr_device_info *const dev_info)
 {
-   struct rogue_fwif_cdm_regs *fw_regs = &cmd->regs;
-
    memset(cmd, 0, sizeof(*cmd));
 
    cmd->cmn.frame_num = submit_info->frame_num;
 
-   fw_regs->tpu_border_colour_table = submit_info->regs.tpu_border_colour_table;
-   fw_regs->cdm_item = submit_info->regs.cdm_item;
-   fw_regs->compute_cluster = submit_info->regs.compute_cluster;
-   fw_regs->cdm_ctrl_stream_base = submit_info->regs.cdm_ctrl_stream_base;
-   fw_regs->cdm_context_state_base_addr =
-      submit_info->regs.cdm_ctx_state_base_addr;
-   fw_regs->tpu = submit_info->regs.tpu;
-   fw_regs->cdm_resume_pds1 = submit_info->regs.cdm_resume_pds1;
+   pvr_srv_compute_cmd_stream_load(cmd,
+                                   submit_info->fw_stream,
+                                   submit_info->fw_stream_len,
+                                   dev_info);
+
+   if (submit_info->fw_ext_stream_len) {
+      pvr_srv_compute_cmd_ext_stream_load(cmd,
+                                          submit_info->fw_ext_stream,
+                                          submit_info->fw_ext_stream_len,
+                                          dev_info);
+   }
 
    if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP)
       cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
@@ -165,6 +228,7 @@ static void pvr_srv_compute_cmd_init(
 VkResult pvr_srv_winsys_compute_submit(
    const struct pvr_winsys_compute_ctx *ctx,
    const struct pvr_winsys_compute_submit_info *submit_info,
+   const struct pvr_device_info *const dev_info,
    struct vk_sync *signal_sync)
 {
    const struct pvr_srv_winsys_compute_ctx *srv_ctx =
@@ -176,7 +240,7 @@ VkResult pvr_srv_winsys_compute_submit(
    int in_fd = -1;
    int fence;
 
-   pvr_srv_compute_cmd_init(submit_info, &compute_cmd);
+   pvr_srv_compute_cmd_init(submit_info, &compute_cmd, dev_info);
 
    for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
       struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);
index 7793b75..a5a5075 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <vulkan/vulkan.h>
 
+struct pvr_device_info;
 struct pvr_winsys;
 struct pvr_winsys_compute_ctx;
 struct pvr_winsys_compute_ctx_create_info;
@@ -45,6 +46,7 @@ void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx);
 VkResult pvr_srv_winsys_compute_submit(
    const struct pvr_winsys_compute_ctx *ctx,
    const struct pvr_winsys_compute_submit_info *submit_info,
+   const struct pvr_device_info *dev_info,
    struct vk_sync *signal_sync);
 
 #endif /* PVR_SRV_JOB_COMPUTE_H */
index 8080bee..d6d1510 100644 (file)
@@ -402,28 +402,82 @@ void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx)
    vk_free(srv_ws->alloc, srv_ctx);
 }
 
+static void
+pvr_srv_geometry_cmd_stream_load(struct rogue_fwif_cmd_ta *const cmd,
+                                 const uint8_t *const stream,
+                                 const uint32_t stream_len,
+                                 const struct pvr_device_info *const dev_info)
+{
+   const uint32_t *stream_ptr = (const uint32_t *)stream;
+   struct rogue_fwif_ta_regs *const regs = &cmd->regs;
+
+   regs->vdm_ctrl_stream_base = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_VDM_CTRL_STREAM_BASE);
+
+   regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_VDM);
+
+   regs->ppp_ctrl = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_PPP_CTRL);
+
+   regs->te_psg = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_TE_PSG);
+
+   regs->vdm_context_resume_task0_size = *stream_ptr;
+   stream_ptr += pvr_cmd_length(VDMCTRL_PDS_STATE0);
+
+   regs->view_idx = *stream_ptr;
+   stream_ptr++;
+
+   assert((const uint8_t *)stream_ptr - stream == stream_len);
+}
+
+static void pvr_srv_geometry_cmd_ext_stream_load(
+   struct rogue_fwif_cmd_ta *const cmd,
+   const uint8_t *const ext_stream,
+   const uint32_t ext_stream_len,
+   const struct pvr_device_info *const dev_info)
+{
+   const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
+   struct rogue_fwif_ta_regs *const regs = &cmd->regs;
+
+   struct PVRX(FW_STREAM_EXTHDR_GEOM0) header0;
+
+   header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_GEOM0);
+   ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_GEOM0);
+
+   assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
+   if (header0.has_brn49927) {
+      regs->tpu = *ext_stream_ptr;
+      ext_stream_ptr += pvr_cmd_length(CR_TPU);
+   }
+
+   assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
+}
+
 static void pvr_srv_geometry_cmd_init(
    const struct pvr_winsys_render_submit_info *submit_info,
    const struct pvr_srv_sync_prim *sync_prim,
-   struct rogue_fwif_cmd_ta *cmd)
+   struct rogue_fwif_cmd_ta *cmd,
+   const struct pvr_device_info *const dev_info)
 {
    const struct pvr_winsys_geometry_state *state = &submit_info->geometry;
-   struct rogue_fwif_ta_regs *fw_regs = &cmd->geom_regs;
 
    memset(cmd, 0, sizeof(*cmd));
 
    cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
 
-   fw_regs->vdm_ctrl_stream_base = state->regs.vdm_ctrl_stream_base;
-   fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
-   fw_regs->ppp_ctrl = state->regs.ppp_ctrl;
-   fw_regs->te_psg = state->regs.te_psg;
-   fw_regs->tpu = state->regs.tpu;
-   fw_regs->vdm_context_resume_task0_size =
-      state->regs.vdm_ctx_resume_task0_size;
+   pvr_srv_geometry_cmd_stream_load(cmd,
+                                    state->fw_stream,
+                                    state->fw_stream_len,
+                                    dev_info);
 
-   assert(state->regs.pds_ctrl >> 32U == 0U);
-   fw_regs->pds_ctrl = (uint32_t)state->regs.pds_ctrl;
+   if (state->fw_ext_stream_len) {
+      pvr_srv_geometry_cmd_ext_stream_load(cmd,
+                                           state->fw_ext_stream,
+                                           state->fw_ext_stream_len,
+                                           dev_info);
+   }
 
    if (state->flags & PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY)
       cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK;
@@ -439,73 +493,160 @@ static void pvr_srv_geometry_cmd_init(
    cmd->partial_render_ta_3d_fence.value = sync_prim->value;
 }
 
+static void
+pvr_srv_fragment_cmd_stream_load(struct rogue_fwif_cmd_3d *const cmd,
+                                 const uint8_t *const stream,
+                                 const uint32_t stream_len,
+                                 const struct pvr_device_info *const dev_info)
+{
+   const uint32_t *stream_ptr = (const uint32_t *)stream;
+   struct rogue_fwif_3d_regs *const regs = &cmd->regs;
+
+   regs->isp_scissor_base = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_SCISSOR_BASE);
+
+   regs->isp_dbias_base = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_DBIAS_BASE);
+
+   regs->isp_oclqry_base = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
+
+   regs->isp_zlsctl = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
+
+   regs->isp_zload_store_base = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_ZLOAD_BASE);
+
+   regs->isp_stencil_load_store_base = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_STENCIL_LOAD_BASE);
+
+   regs->fb_cdc_zls = *(const uint64_t *)stream_ptr;
+   stream_ptr += 2U;
+
+   STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word) == 8U);
+   STATIC_ASSERT(ARRAY_SIZE(regs->pbe_word[0]) == 3U);
+   STATIC_ASSERT(sizeof(regs->pbe_word[0][0]) == sizeof(uint64_t));
+   memcpy(regs->pbe_word, stream_ptr, sizeof(regs->pbe_word));
+   stream_ptr += 8U * 3U * 2U;
+
+   regs->tpu_border_colour_table = *(const uint64_t *)stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_PDM);
+
+   STATIC_ASSERT(ARRAY_SIZE(regs->pds_bgnd) == 3U);
+   STATIC_ASSERT(sizeof(regs->pds_bgnd[0]) == sizeof(uint64_t));
+   memcpy(regs->pds_bgnd, stream_ptr, sizeof(regs->pds_bgnd));
+   stream_ptr += 3U * 2U;
+
+   STATIC_ASSERT(ARRAY_SIZE(regs->pds_pr_bgnd) == 3U);
+   STATIC_ASSERT(sizeof(regs->pds_pr_bgnd[0]) == sizeof(uint64_t));
+   memcpy(regs->pds_pr_bgnd, stream_ptr, sizeof(regs->pds_pr_bgnd));
+   stream_ptr += 3U * 2U;
+
+   STATIC_ASSERT(ARRAY_SIZE(regs->usc_clear_register) == 8U);
+   STATIC_ASSERT(sizeof(regs->usc_clear_register[0]) == sizeof(uint32_t));
+   memcpy(regs->usc_clear_register,
+          stream_ptr,
+          sizeof(regs->usc_clear_register));
+   stream_ptr += 8U;
+
+   regs->usc_pixel_output_ctrl = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
+
+   regs->isp_bgobjdepth = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_BGOBJDEPTH);
+
+   regs->isp_bgobjvals = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
+
+   regs->isp_aa = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_AA);
+
+   regs->isp_ctl = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_ISP_CTL);
+
+   regs->event_pixel_pds_info = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
+
+   if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
+      regs->pixel_phantom = *stream_ptr;
+      stream_ptr++;
+   }
+
+   regs->view_idx = *stream_ptr;
+   stream_ptr++;
+
+   regs->event_pixel_pds_data = *stream_ptr;
+   stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
+
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+      regs->isp_oclqry_stride = *stream_ptr;
+      stream_ptr++;
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
+      regs->isp_zls_pixels = *stream_ptr;
+      stream_ptr += pvr_cmd_length(CR_ISP_ZLS_PIXELS);
+   }
+
+   cmd->zls_stride = *stream_ptr;
+   stream_ptr++;
+
+   cmd->sls_stride = *stream_ptr;
+   stream_ptr++;
+
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+      cmd->execute_count = *stream_ptr;
+      stream_ptr++;
+   }
+
+   assert((const uint8_t *)stream_ptr - stream == stream_len);
+}
+
+static void pvr_srv_fragment_cmd_ext_stream_load(
+   struct rogue_fwif_cmd_3d *const cmd,
+   const uint8_t *const ext_stream,
+   const uint32_t ext_stream_len,
+   const struct pvr_device_info *const dev_info)
+{
+   const uint32_t *ext_stream_ptr = (const uint32_t *)ext_stream;
+   struct rogue_fwif_3d_regs *const regs = &cmd->regs;
+
+   struct PVRX(FW_STREAM_EXTHDR_FRAG0) header0;
+
+   header0 = pvr_csb_unpack(ext_stream_ptr, FW_STREAM_EXTHDR_FRAG0);
+   ext_stream_ptr += pvr_cmd_length(FW_STREAM_EXTHDR_FRAG0);
+
+   assert(PVR_HAS_QUIRK(dev_info, 49927) == header0.has_brn49927);
+   if (header0.has_brn49927) {
+      regs->tpu = *ext_stream_ptr;
+      ext_stream_ptr += pvr_cmd_length(CR_TPU);
+   }
+
+   assert((const uint8_t *)ext_stream_ptr - ext_stream == ext_stream_len);
+}
+
 static void pvr_srv_fragment_cmd_init(
    const struct pvr_winsys_render_submit_info *submit_info,
-   struct rogue_fwif_cmd_3d *cmd)
+   struct rogue_fwif_cmd_3d *cmd,
+   const struct pvr_device_info *dev_info)
 {
    const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
-   struct rogue_fwif_3d_regs *fw_regs = &cmd->regs;
 
    memset(cmd, 0, sizeof(*cmd));
 
    cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
 
-   fw_regs->usc_pixel_output_ctrl = state->regs.usc_pixel_output_ctrl;
-   fw_regs->isp_bgobjdepth = state->regs.isp_bgobjdepth;
-   fw_regs->isp_bgobjvals = state->regs.isp_bgobjvals;
-   fw_regs->isp_aa = state->regs.isp_aa;
-   fw_regs->isp_ctl = state->regs.isp_ctl;
-   fw_regs->tpu = state->regs.tpu;
-   fw_regs->event_pixel_pds_info = state->regs.event_pixel_pds_info;
-   fw_regs->pixel_phantom = state->regs.pixel_phantom;
-   fw_regs->event_pixel_pds_data = state->regs.event_pixel_pds_data;
-   fw_regs->isp_scissor_base = state->regs.isp_scissor_base;
-   fw_regs->isp_dbias_base = state->regs.isp_dbias_base;
-   fw_regs->isp_oclqry_base = state->regs.isp_oclqry_base;
-   fw_regs->isp_zlsctl = state->regs.isp_zlsctl;
-   fw_regs->isp_zload_store_base = state->regs.isp_zload_store_base;
-   fw_regs->isp_stencil_load_store_base =
-      state->regs.isp_stencil_load_store_base;
-   fw_regs->isp_zls_pixels = state->regs.isp_zls_pixels;
-
-   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word) ==
-                 ARRAY_SIZE(state->regs.pbe_word));
-
-   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word[0]) <=
-                 ARRAY_SIZE(state->regs.pbe_word[0]));
-
-#if !defined(NDEBUG)
-   /* Depending on the hardware we might have more PBE words than the firmware
-    * accepts so check that the extra words are 0.
-    */
-   if (ARRAY_SIZE(fw_regs->pbe_word[0]) < ARRAY_SIZE(state->regs.pbe_word[0])) {
-      /* For each color attachment. */
-      for (uint32_t i = 0; i < ARRAY_SIZE(state->regs.pbe_word); i++) {
-         /* For each extra PBE word not used by the firmware. */
-         for (uint32_t j = ARRAY_SIZE(fw_regs->pbe_word[0]);
-              j < ARRAY_SIZE(state->regs.pbe_word[0]);
-              j++) {
-            assert(state->regs.pbe_word[i][j] == 0);
-         }
-      }
-   }
-#endif
-
-   memcpy(fw_regs->pbe_word, state->regs.pbe_word, sizeof(fw_regs->pbe_word));
+   pvr_srv_fragment_cmd_stream_load(cmd,
+                                    state->fw_stream,
+                                    state->fw_stream_len,
+                                    dev_info);
 
-   fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
-
-   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_bgnd) ==
-                 ARRAY_SIZE(state->regs.pds_bgnd));
-   typed_memcpy(fw_regs->pds_bgnd,
-                state->regs.pds_bgnd,
-                ARRAY_SIZE(fw_regs->pds_bgnd));
-
-   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_pr_bgnd) ==
-                 ARRAY_SIZE(state->regs.pds_pr_bgnd));
-   typed_memcpy(fw_regs->pds_pr_bgnd,
-                state->regs.pds_pr_bgnd,
-                ARRAY_SIZE(fw_regs->pds_pr_bgnd));
+   if (state->fw_ext_stream_len) {
+      pvr_srv_fragment_cmd_ext_stream_load(cmd,
+                                           state->fw_ext_stream,
+                                           state->fw_ext_stream_len,
+                                           dev_info);
+   }
 
    if (state->flags & PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT)
       cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER;
@@ -518,14 +659,12 @@ static void pvr_srv_fragment_cmd_init(
 
    if (state->flags & PVR_WINSYS_FRAG_FLAG_SINGLE_CORE)
       cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE;
-
-   cmd->zls_stride = state->zls_stride;
-   cmd->sls_stride = state->sls_stride;
 }
 
 VkResult pvr_srv_winsys_render_submit(
    const struct pvr_winsys_render_ctx *ctx,
    const struct pvr_winsys_render_submit_info *submit_info,
+   const struct pvr_device_info *dev_info,
    struct vk_sync *signal_sync_geom,
    struct vk_sync *signal_sync_frag)
 {
@@ -552,8 +691,8 @@ VkResult pvr_srv_winsys_render_submit(
 
    VkResult result;
 
-   pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd);
-   pvr_srv_fragment_cmd_init(submit_info, &frag_cmd);
+   pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd, dev_info);
+   pvr_srv_fragment_cmd_init(submit_info, &frag_cmd, dev_info);
 
    for (uint32_t i = 0U; i < submit_info->wait_count; i++) {
       struct pvr_srv_sync *srv_wait_sync = to_srv_sync(submit_info->waits[i]);
index 3612e38..257a2ab 100644 (file)
@@ -68,6 +68,7 @@ void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx);
 VkResult pvr_srv_winsys_render_submit(
    const struct pvr_winsys_render_ctx *ctx,
    const struct pvr_winsys_render_submit_info *submit_info,
+   const struct pvr_device_info *dev_info,
    struct vk_sync *signal_sync_geom,
    struct vk_sync *signal_sync_frag);