pvr: Move BRN 44079, 48492 and 66011 code into pvrsrvkm specific directory
authorSarah Walker <sarah.walker@imgtec.com>
Mon, 4 Jul 2022 09:05:30 +0000 (10:05 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 27 Jul 2022 10:13:19 +0000 (10:13 +0000)
The new kernel mode driver will provide the relevant information directly to
userspace, so this code is only required for pvrsrvkm.

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17595>

src/imagination/common/pvr_device_info.h
src/imagination/include/hwdef/rogue_hw_utils.h
src/imagination/vulkan/pvr_cmd_buffer.c
src/imagination/vulkan/pvr_device.c
src/imagination/vulkan/pvr_job_common.c
src/imagination/vulkan/pvr_job_common.h
src/imagination/vulkan/pvr_job_context.c
src/imagination/vulkan/pvr_job_render.c
src/imagination/vulkan/pvr_private.h
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c

index a9098cb..339bb1f 100644 (file)
@@ -356,6 +356,16 @@ struct pvr_device_info {
    struct pvr_device_quirks quirks;
 };
 
+struct pvr_device_runtime_info {
+   uint64_t min_free_list_size;
+   uint64_t reserved_shared_size;
+   uint64_t total_reserved_partition_size;
+   uint64_t num_phantoms;
+   uint64_t max_coeffs;
+   uint64_t cdm_max_local_mem_size_regs;
+   uint32_t core_count;
+};
+
 /**
  * Packs B, V, N and C values into a 64-bit unsigned integer.
  *
index 56ea843..0679fe4 100644 (file)
@@ -172,15 +172,6 @@ rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info)
    return num_macrotiles_x * num_macrotiles_y * 8U;
 }
 
-/* To get the number of required Bernado/Phantom(s), divide the number of
- * clusters by 4 and round up.
- */
-static inline uint32_t
-rogue_get_num_phantoms(const struct pvr_device_info *dev_info)
-{
-   return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U);
-}
-
 /* Region header size in bytes. */
 static inline uint32_t
 rogue_get_region_header_size(const struct pvr_device_info *dev_info)
@@ -198,24 +189,6 @@ rogue_get_region_header_size(const struct pvr_device_info *dev_info)
    return 5;
 }
 
-/* Return the total reserved size of partition in dwords. */
-static inline uint32_t
-rogue_get_total_reserved_partition_size(const struct pvr_device_info *dev_info)
-{
-   uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
-   uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
-   uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
-
-   if (tile_size_x == 16 && tile_size_y == 16) {
-      return tile_size_x * tile_size_y * max_partitions *
-             PVR_GET_FEATURE_VALUE(dev_info,
-                                   usc_min_output_registers_per_pix,
-                                   0);
-   }
-
-   return max_partitions * 1024U;
-}
-
 static inline uint32_t
 rogue_get_render_size_max(const struct pvr_device_info *dev_info)
 {
@@ -252,26 +225,6 @@ static inline uint32_t pvr_get_max_user_vertex_output_components(
 }
 
 static inline uint32_t
-rogue_get_reserved_shared_size(const struct pvr_device_info *dev_info)
-{
-   uint32_t common_store_size_in_dwords =
-      PVR_GET_FEATURE_VALUE(dev_info,
-                            common_store_size_in_dwords,
-                            512U * 4U * 4U);
-   uint32_t reserved_shared_size =
-      common_store_size_in_dwords - (256U * 4U) -
-      rogue_get_total_reserved_partition_size(dev_info);
-
-   if (PVR_HAS_QUIRK(dev_info, 44079)) {
-      uint32_t common_store_split_point = (768U * 4U * 4U);
-
-      return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size);
-   }
-
-   return reserved_shared_size;
-}
-
-static inline uint32_t
 rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
 {
    if (PVR_HAS_FEATURE(dev_info, compute))
@@ -281,42 +234,6 @@ rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
 }
 
 static inline uint32_t
-rogue_get_max_coeffs(const struct pvr_device_info *dev_info)
-{
-   uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS;
-   uint32_t pending_allocation_shared_regs = 2U * 1024U;
-   uint32_t pending_allocation_coeff_regs = 0U;
-   uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
-   uint32_t tiles_in_flight =
-      PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
-   uint32_t max_coeff_pixel_portion =
-      DIV_ROUND_UP(tiles_in_flight, num_phantoms);
-
-   max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS;
-
-   /* Compute tasks on cores with BRN48492 and without compute overlap may lock
-    * up without two additional lines of coeffs.
-    */
-   if (PVR_HAS_QUIRK(dev_info, 48492) &&
-       !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
-      pending_allocation_coeff_regs = 2U * 1024U;
-   }
-
-   if (PVR_HAS_ERN(dev_info, 38748))
-      pending_allocation_shared_regs = 0U;
-
-   if (PVR_HAS_ERN(dev_info, 38020)) {
-      max_coeff_additional_portion +=
-         rogue_max_compute_shared_registers(dev_info);
-   }
-
-   return rogue_get_reserved_shared_size(dev_info) +
-          pending_allocation_coeff_regs -
-          (max_coeff_pixel_portion + max_coeff_additional_portion +
-           pending_allocation_shared_regs);
-}
-
-static inline uint32_t
 rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
 {
    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
@@ -342,28 +259,6 @@ static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment(
 }
 
 static inline uint32_t
-rogue_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info)
-{
-   uint32_t available_coeffs_in_dwords = rogue_get_max_coeffs(dev_info);
-
-   if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) &&
-       !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
-      /* Driver must not use the 2 reserved lines. */
-      available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2;
-   }
-
-   /* The maximum amount of local memory available to a kernel is the minimum
-    * of the total number of coefficient registers available and the max common
-    * store allocation size which can be made by the CDM.
-    *
-    * If any coeff lines are reserved for tessellation or pixel then we need to
-    * subtract those too.
-    */
-   return MIN2(available_coeffs_in_dwords,
-               ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS);
-}
-
-static inline uint32_t
 rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info)
 {
    /* The number of tasks which can be executed per USC - Limited to 16U by the
index a6edd6c..9f75c98 100644 (file)
@@ -1066,10 +1066,15 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
  */
 #define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U
 
-static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info,
-                                         struct pvr_cmd_buffer *cmd_buffer,
-                                         struct pvr_sub_cmd_compute *sub_cmd)
+static void
+pvr_sub_cmd_compute_job_init(const struct pvr_physical_device *pdevice,
+                             struct pvr_cmd_buffer *cmd_buffer,
+                             struct pvr_sub_cmd_compute *sub_cmd)
 {
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &pdevice->dev_runtime_info;
+   const struct pvr_device_info *dev_info = &pdevice->dev_info;
+
    if (sub_cmd->uses_barrier)
       sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
 
@@ -1102,7 +1107,7 @@ static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info,
 
    if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
        PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
-       rogue_get_num_phantoms(dev_info) > 1 && sub_cmd->uses_atomic_ops) {
+       dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
       /* Each phantom has its own MCU, so atomicity can only be guaranteed
        * when all work items are processed on the same phantom. This means we
        * need to disable all USCs other than those of the first phantom, which
@@ -1131,14 +1136,17 @@ static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info,
    (1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE))
 
 static uint32_t
-pvr_compute_flat_slot_size(const struct pvr_device_info *dev_info,
+pvr_compute_flat_slot_size(const struct pvr_physical_device *pdevice,
                            uint32_t coeff_regs_count,
                            bool use_barrier,
                            uint32_t total_workitems)
 {
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &pdevice->dev_runtime_info;
+   const struct pvr_device_info *dev_info = &pdevice->dev_info;
    uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK;
    uint32_t max_avail_coeff_regs =
-      rogue_get_cdm_max_local_mem_size_regs(dev_info);
+      dev_runtime_info->cdm_max_local_mem_size_regs;
    uint32_t localstore_chunks_count =
       DIV_ROUND_UP(coeff_regs_count << 2,
                    PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
@@ -1309,8 +1317,7 @@ pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
 {
    const struct pvr_pds_upload *program =
       &cmd_buffer->device->pds_compute_fence_program;
-   const struct pvr_device_info *dev_info =
-      &cmd_buffer->device->pdevice->dev_info;
+   const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
    struct pvr_csb *csb = &sub_cmd->control_stream;
 
    struct pvr_compute_kernel_info info = {
@@ -1336,7 +1343,7 @@ pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
    /* Here we calculate the slot size. This can depend on the use of barriers,
     * local memory, BRN's or other factors.
     */
-   info.max_instances = pvr_compute_flat_slot_size(dev_info, 0U, false, 1U);
+   info.max_instances = pvr_compute_flat_slot_size(pdevice, 0U, false, 1U);
 
    pvr_compute_generate_control_stream(csb, &info);
 }
@@ -1413,7 +1420,7 @@ static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
          return result;
       }
 
-      pvr_sub_cmd_compute_job_init(&device->pdevice->dev_info,
+      pvr_sub_cmd_compute_job_init(device->pdevice,
                                    cmd_buffer,
                                    compute_sub_cmd);
       break;
@@ -2838,8 +2845,7 @@ static VkResult pvr_setup_descriptor_mappings(
 static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
                                       struct pvr_sub_cmd_compute *const sub_cmd)
 {
-   const struct pvr_device_info *dev_info =
-      &cmd_buffer->device->pdevice->dev_info;
+   const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
    struct pvr_csb *csb = &sub_cmd->control_stream;
    const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
@@ -2892,18 +2898,21 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
    /* We don't need to pad the workgroup size. */
 
    info.max_instances =
-      pvr_compute_flat_slot_size(dev_info, const_shared_reg_count, false, 1U);
+      pvr_compute_flat_slot_size(pdevice, const_shared_reg_count, false, 1U);
 
    pvr_compute_generate_control_stream(csb, &info);
 }
 
 static uint32_t
-pvr_compute_flat_pad_workgroup_size(const struct pvr_device_info *dev_info,
+pvr_compute_flat_pad_workgroup_size(const struct pvr_physical_device *pdevice,
                                     uint32_t workgroup_size,
                                     uint32_t coeff_regs_count)
 {
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &pdevice->dev_runtime_info;
+   const struct pvr_device_info *dev_info = &pdevice->dev_info;
    uint32_t max_avail_coeff_regs =
-      rogue_get_cdm_max_local_mem_size_regs(dev_info);
+      dev_runtime_info->cdm_max_local_mem_size_regs;
    uint32_t coeff_regs_count_aligned =
       ALIGN_POT(coeff_regs_count,
                 PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE) >> 2U);
@@ -2934,8 +2943,9 @@ static void pvr_compute_update_kernel(
    struct pvr_sub_cmd_compute *const sub_cmd,
    const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS])
 {
-   const struct pvr_device_info *dev_info =
-      &cmd_buffer->device->pdevice->dev_info;
+   const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &pdevice->dev_runtime_info;
    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
    struct pvr_csb *csb = &sub_cmd->control_stream;
    const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
@@ -2976,7 +2986,7 @@ static void pvr_compute_update_kernel(
    if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) {
       /* Enforce a single workgroup per cluster through allocation starvation.
        */
-      coeff_regs = rogue_get_cdm_max_local_mem_size_regs(dev_info);
+      coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs;
    } else {
       coeff_regs = pipeline->state.shader.coefficient_register_count;
    }
@@ -2991,14 +3001,14 @@ static void pvr_compute_update_kernel(
    coeff_regs += pipeline->state.shader.const_shared_reg_count;
 
    work_size =
-      pvr_compute_flat_pad_workgroup_size(dev_info, work_size, coeff_regs);
+      pvr_compute_flat_pad_workgroup_size(pdevice, work_size, coeff_regs);
 
    info.local_size[0] = work_size;
    info.local_size[1] = 1U;
    info.local_size[2] = 1U;
 
    info.max_instances =
-      pvr_compute_flat_slot_size(dev_info, coeff_regs, false, work_size);
+      pvr_compute_flat_slot_size(pdevice, coeff_regs, false, work_size);
 
    pvr_compute_generate_control_stream(csb, &info);
 }
@@ -3632,8 +3642,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
       &state->gfx_pipeline->fragment_shader_state.pds_coeff_program;
    const struct pvr_pipeline_stage_state *fragment_state =
       &state->gfx_pipeline->fragment_shader_state.stage_state;
-   struct pvr_device_info *const dev_info =
-      &cmd_buffer->device->pdevice->dev_info;
+   const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
    struct pvr_emit_state *const emit_state = &state->emit_state;
    struct pvr_ppp_state *const ppp_state = &state->ppp_state;
 
@@ -3659,7 +3668,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
 
    const uint32_t max_tiles_in_flight =
       pvr_calc_fscommon_size_and_tiles_in_flight(
-         dev_info,
+         pdevice,
          usc_shared_size *
             PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE),
          1);
index d03373a..16407ff 100644 (file)
@@ -661,33 +661,35 @@ void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
 
 /* TODO: See if this function can be improved once fully implemented. */
 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
-   const struct pvr_device_info *dev_info,
+   const struct pvr_physical_device *pdevice,
    uint32_t fs_common_size,
    uint32_t min_tiles_in_flight)
 {
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &pdevice->dev_runtime_info;
+   const struct pvr_device_info *dev_info = &pdevice->dev_info;
    uint32_t max_tiles_in_flight;
    uint32_t num_allocs;
 
    if (PVR_HAS_FEATURE(dev_info, s8xe)) {
       num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
    } else {
-      uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
       uint32_t min_cluster_per_phantom = 0;
 
-      if (num_phantoms > 1) {
+      if (dev_runtime_info->num_phantoms > 1) {
          pvr_finishme("Unimplemented path!!");
       } else {
          min_cluster_per_phantom =
             PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
       }
 
-      if (num_phantoms > 1)
+      if (dev_runtime_info->num_phantoms > 1)
          pvr_finishme("Unimplemented path!!");
 
-      if (num_phantoms > 2)
+      if (dev_runtime_info->num_phantoms > 2)
          pvr_finishme("Unimplemented path!!");
 
-      if (num_phantoms > 3)
+      if (dev_runtime_info->num_phantoms > 3)
          pvr_finishme("Unimplemented path!!");
 
       if (min_cluster_per_phantom >= 4)
@@ -702,6 +704,8 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
       PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
 
    if (fs_common_size == UINT_MAX) {
+      const struct pvr_device_runtime_info *dev_runtime_info =
+         &pdevice->dev_runtime_info;
       uint32_t max_common_size;
 
       num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
@@ -711,8 +715,8 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
          num_allocs += 1;
       }
 
-      max_common_size = rogue_get_reserved_shared_size(dev_info) -
-                        rogue_get_max_coeffs(dev_info);
+      max_common_size =
+         dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
 
       /* Double resource requirements to deal with fragmentation. */
       max_common_size /= num_allocs * 2;
@@ -764,7 +768,7 @@ pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice)
    };
 
    const uint32_t common_size =
-      pvr_calc_fscommon_size_and_tiles_in_flight(&pdevice->dev_info, -1, 1);
+      pvr_calc_fscommon_size_and_tiles_in_flight(pdevice, -1, 1);
    enum pvr_descriptor_cs_level cs_level;
 
    if (common_size >= 2048) {
index 24f18df..fb8f31b 100644 (file)
@@ -286,13 +286,15 @@ void pvr_pbe_pack_state(
  * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
  * pvr_render_job_ws_fragment_state_init().
  */
-void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
-                               uint32_t msaa_mode,
-                               uint32_t pixel_width,
-                               bool paired_tiles,
-                               uint32_t max_tiles_in_flight,
-                               uint32_t *const isp_ctl_out,
-                               uint32_t *const pixel_ctl_out)
+void pvr_setup_tiles_in_flight(
+   const struct pvr_device_info *dev_info,
+   const struct pvr_device_runtime_info *dev_runtime_info,
+   uint32_t msaa_mode,
+   uint32_t pixel_width,
+   bool paired_tiles,
+   uint32_t max_tiles_in_flight,
+   uint32_t *const isp_ctl_out,
+   uint32_t *const pixel_ctl_out)
 {
    uint32_t total_tiles_in_flight = 0;
    uint32_t usable_partition_size;
@@ -347,9 +349,8 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
 
    /* Maximum available partition space for partitions of this size. */
    max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
-   usable_partition_size =
-      MIN2(rogue_get_total_reserved_partition_size(dev_info),
-           partition_size * max_partitions);
+   usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size,
+                                partition_size * max_partitions);
 
    if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
        (1024 * 4 * 4)) {
@@ -371,7 +372,7 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
       MIN2(max_partitions, usable_partition_size / partition_size);
 
    if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
-      max_phantoms = rogue_get_num_phantoms(dev_info);
+      max_phantoms = dev_runtime_info->num_phantoms;
    else if (PVR_HAS_FEATURE(dev_info, roguexe))
       max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
    else
@@ -399,7 +400,7 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
       if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
           PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
              2) {
-         isp_tiles_in_flight /= rogue_get_num_phantoms(dev_info);
+         isp_tiles_in_flight /= dev_runtime_info->num_phantoms;
       }
 
       isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);
index 56bf818..88a3950 100644 (file)
@@ -140,12 +140,14 @@ void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
                                       uint32_t *const src_format_out,
                                       enum pvr_pbe_gamma *const gamma_out);
 
-void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
-                               uint32_t msaa_mode,
-                               uint32_t pixel_width,
-                               bool paired_tiles,
-                               uint32_t max_tiles_in_flight,
-                               uint32_t *const isp_ctl_out,
-                               uint32_t *const pixel_ctl_out);
+void pvr_setup_tiles_in_flight(
+   const struct pvr_device_info *dev_info,
+   const struct pvr_device_runtime_info *dev_runtime_info,
+   uint32_t msaa_mode,
+   uint32_t pixel_width,
+   bool paired_tiles,
+   uint32_t max_tiles_in_flight,
+   uint32_t *const isp_ctl_out,
+   uint32_t *const pixel_ctl_out);
 
 #endif /* PVR_JOB_COMMON_H */
index ecc5e7e..bc84ea0 100644 (file)
@@ -925,6 +925,8 @@ static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
 {
    const uint32_t pds_data_alignment =
       PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &device->pdevice->dev_runtime_info;
    ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
    uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
    struct pvr_pds_fence_program program = { 0 };
@@ -934,7 +936,7 @@ static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
 
    /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
    assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
-            rogue_get_num_phantoms(dev_info) >= 2));
+            dev_runtime_info->num_phantoms >= 2));
 
    pvr_pds_generate_fence_terminate_program(&program,
                                             staging_buffer,
index f60b39e..80949db 100644 (file)
@@ -1298,6 +1298,8 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
 {
    const enum PVRX(CR_ISP_AA_MODE_TYPE)
       isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples);
+   const struct pvr_device_runtime_info *dev_runtime_info =
+      &ctx->device->pdevice->dev_runtime_info;
    const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
    uint32_t isp_ctl;
 
@@ -1305,6 +1307,7 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
 
    /* FIXME: pass in the number of samples rather than isp_aa_mode? */
    pvr_setup_tiles_in_flight(dev_info,
+                             dev_runtime_info,
                              isp_aa_mode,
                              job->pixel_output_width,
                              false,
@@ -1340,7 +1343,7 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
 
    if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
        PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
-       rogue_get_num_phantoms(dev_info) > 1 && job->frag_uses_atomic_ops) {
+       dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
       /* Each phantom has its own MCU, so atomicity can only be guaranteed
        * when all work items are processed on the same phantom. This means we
        * need to disable all USCs other than those of the first phantom, which
index 3b62f9d..c33e409 100644 (file)
@@ -199,9 +199,7 @@ struct pvr_physical_device {
    struct pvr_winsys *ws;
    struct pvr_device_info dev_info;
 
-   struct pvr_device_runtime_info {
-      uint32_t core_count;
-   } dev_runtime_info;
+   struct pvr_device_runtime_info dev_runtime_info;
 
    VkPhysicalDeviceMemoryProperties memory;
 
@@ -1272,7 +1270,7 @@ struct pvr_load_op {
 };
 
 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
-   const struct pvr_device_info *dev_info,
+   const struct pvr_physical_device *pdevice,
    uint32_t fs_common_size,
    uint32_t min_tiles_in_flight);
 
index 6c24ec6..5da06d5 100644 (file)
@@ -26,6 +26,7 @@
 #include <stdint.h>
 #include <xf86drm.h>
 
+#include "hwdef/rogue_hw_utils.h"
 #include "pvr_csb.h"
 #include "pvr_device_info.h"
 #include "pvr_private.h"
@@ -42,6 +43,7 @@
 #include "pvr_winsys.h"
 #include "pvr_winsys_helper.h"
 #include "util/log.h"
+#include "util/macros.h"
 #include "util/os_misc.h"
 #include "vk_log.h"
 
@@ -377,6 +379,125 @@ static void pvr_srv_winsys_destroy(struct pvr_winsys *ws)
    pvr_srv_connection_destroy(fd);
 }
 
+static uint64_t
+pvr_srv_get_min_free_list_size(const struct pvr_device_info *dev_info)
+{
+   uint64_t min_num_pages;
+
+   if (PVR_HAS_FEATURE(dev_info, roguexe)) {
+      if (PVR_HAS_QUIRK(dev_info, 66011))
+         min_num_pages = 40U;
+      else
+         min_num_pages = 25U;
+   } else {
+      min_num_pages = 50U;
+   }
+
+   return min_num_pages << ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+}
+
+static inline uint64_t
+pvr_srv_get_num_phantoms(const struct pvr_device_info *dev_info)
+{
+   return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U);
+}
+
+/* Return the total reserved size of partition in dwords. */
+static inline uint64_t pvr_srv_get_total_reserved_partition_size(
+   const struct pvr_device_info *dev_info)
+{
+   uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
+   uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
+   uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
+
+   if (tile_size_x == 16 && tile_size_y == 16) {
+      return tile_size_x * tile_size_y * max_partitions *
+             PVR_GET_FEATURE_VALUE(dev_info,
+                                   usc_min_output_registers_per_pix,
+                                   0);
+   }
+
+   return max_partitions * 1024U;
+}
+
+static inline uint64_t
+pvr_srv_get_reserved_shared_size(const struct pvr_device_info *dev_info)
+{
+   uint32_t common_store_size_in_dwords =
+      PVR_GET_FEATURE_VALUE(dev_info,
+                            common_store_size_in_dwords,
+                            512U * 4U * 4U);
+   uint32_t reserved_shared_size =
+      common_store_size_in_dwords - (256U * 4U) -
+      pvr_srv_get_total_reserved_partition_size(dev_info);
+
+   if (PVR_HAS_QUIRK(dev_info, 44079)) {
+      uint32_t common_store_split_point = (768U * 4U * 4U);
+
+      return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size);
+   }
+
+   return reserved_shared_size;
+}
+
+static inline uint64_t
+pvr_srv_get_max_coeffs(const struct pvr_device_info *dev_info)
+{
+   uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS;
+   uint32_t pending_allocation_shared_regs = 2U * 1024U;
+   uint32_t pending_allocation_coeff_regs = 0U;
+   uint32_t num_phantoms = pvr_srv_get_num_phantoms(dev_info);
+   uint32_t tiles_in_flight =
+      PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
+   uint32_t max_coeff_pixel_portion =
+      DIV_ROUND_UP(tiles_in_flight, num_phantoms);
+
+   max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS;
+
+   /* Compute tasks on cores with BRN48492 and without compute overlap may lock
+    * up without two additional lines of coeffs.
+    */
+   if (PVR_HAS_QUIRK(dev_info, 48492) &&
+       !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
+      pending_allocation_coeff_regs = 2U * 1024U;
+   }
+
+   if (PVR_HAS_ERN(dev_info, 38748))
+      pending_allocation_shared_regs = 0U;
+
+   if (PVR_HAS_ERN(dev_info, 38020)) {
+      max_coeff_additional_portion +=
+         rogue_max_compute_shared_registers(dev_info);
+   }
+
+   return pvr_srv_get_reserved_shared_size(dev_info) +
+          pending_allocation_coeff_regs -
+          (max_coeff_pixel_portion + max_coeff_additional_portion +
+           pending_allocation_shared_regs);
+}
+
+static inline uint64_t
+pvr_srv_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info)
+{
+   uint32_t available_coeffs_in_dwords = pvr_srv_get_max_coeffs(dev_info);
+
+   if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) &&
+       !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
+      /* Driver must not use the 2 reserved lines. */
+      available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2;
+   }
+
+   /* The maximum amount of local memory available to a kernel is the minimum
+    * of the total number of coefficient registers available and the max common
+    * store allocation size which can be made by the CDM.
+    *
+    * If any coeff lines are reserved for tessellation or pixel then we need to
+    * subtract those too.
+    */
+   return MIN2(available_coeffs_in_dwords,
+               ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS);
+}
+
 static int
 pvr_srv_winsys_device_info_init(struct pvr_winsys *ws,
                                 struct pvr_device_info *dev_info,
@@ -396,6 +517,16 @@ pvr_srv_winsys_device_info_init(struct pvr_winsys *ws,
       return ret;
    }
 
+   runtime_info->min_free_list_size = pvr_srv_get_min_free_list_size(dev_info);
+   runtime_info->reserved_shared_size =
+      pvr_srv_get_reserved_shared_size(dev_info);
+   runtime_info->total_reserved_partition_size =
+      pvr_srv_get_total_reserved_partition_size(dev_info);
+   runtime_info->num_phantoms = pvr_srv_get_num_phantoms(dev_info);
+   runtime_info->max_coeffs = pvr_srv_get_max_coeffs(dev_info);
+   runtime_info->cdm_max_local_mem_size_regs =
+      pvr_srv_get_cdm_max_local_mem_size_regs(dev_info);
+
    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
       result = pvr_srv_get_multicore_info(srv_ws->render_fd,
                                           0,