pvr: Implement depth/stencil/depth+stencil attachment clear.
authorKarmjit Mahil <Karmjit.Mahil@imgtec.com>
Mon, 26 Sep 2022 15:52:44 +0000 (16:52 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 4 Jan 2023 16:30:39 +0000 (16:30 +0000)
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20055>

src/imagination/vulkan/pvr_blit.c
src/imagination/vulkan/pvr_clear.c
src/imagination/vulkan/pvr_clear.h
src/imagination/vulkan/pvr_cmd_buffer.c
src/imagination/vulkan/pvr_private.h

index 80ed1da..288f6df 100644 (file)
@@ -25,6 +25,7 @@
 #include <stdint.h>
 #include <vulkan/vulkan.h>
 
+#include "pvr_clear.h"
 #include "pvr_csb.h"
 #include "pvr_private.h"
 #include "util/list.h"
@@ -141,13 +142,414 @@ void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
    pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
 }
 
+/**
+ * \brief Returns the maximum number of layers to clear starting from base_layer
+ * that contain or match the target rectangle.
+ *
+ * \param[in] target_rect      The region which the clear should contain or
+ *                             match.
+ * \param[in] base_layer       The layer index to start at.
+ * \param[in] clear_rect_count Amount of clear_rects
+ * \param[in] clear_rects      Array of clear rects.
+ *
+ * \return Max number of layers that cover or match the target region.
+ */
+static uint32_t
+pvr_get_max_layers_covering_target(VkRect2D target_rect,
+                                   uint32_t base_layer,
+                                   uint32_t clear_rect_count,
+                                   const VkClearRect *clear_rects)
+{
+   const int32_t target_x0 = target_rect.offset.x;
+   const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
+   const int32_t target_y0 = target_rect.offset.y;
+   const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
+
+   uint32_t layer_count = 0;
+
+   assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
+   assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
+
+   for (uint32_t i = 0; i < clear_rect_count; i++) {
+      const VkClearRect *clear_rect = &clear_rects[i];
+      const uint32_t max_layer =
+         clear_rect->baseArrayLayer + clear_rect->layerCount;
+      bool target_is_covered;
+      int32_t x0, x1;
+      int32_t y0, y1;
+
+      if (clear_rect->baseArrayLayer == 0)
+         continue;
+
+      assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
+             UINT32_MAX);
+
+      /* Check for layer intersection. */
+      if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
+         continue;
+
+      x0 = clear_rect->rect.offset.x;
+      x1 = x0 + (int32_t)clear_rect->rect.extent.width;
+      y0 = clear_rect->rect.offset.y;
+      y1 = y0 + (int32_t)clear_rect->rect.extent.height;
+
+      assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
+      assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
+             INT32_MAX);
+
+      target_is_covered = x0 <= target_x0 && x1 >= target_x1;
+      target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
+
+      if (target_is_covered)
+         layer_count = MAX2(layer_count, max_layer - base_layer);
+   }
+
+   return layer_count;
+}
+
+/* Return true if vertex shader is required to output render target id to pick
+ * the texture array layer.
+ */
+static inline bool
+pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
+                             uint32_t rect_count,
+                             const VkClearRect *rects)
+{
+   if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
+      return false;
+
+   for (uint32_t i = 0; i < rect_count; i++) {
+      if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
+         return true;
+   }
+
+   return false;
+}
+
+static inline uint32_t
+pvr_clear_template_idx_from_aspect(VkImageAspectFlags aspect)
+{
+   switch (aspect) {
+   case VK_IMAGE_ASPECT_COLOR_BIT:
+      /* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
+       *
+       *    "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
+       *    include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
+       *
+       */
+      return PVR_STATIC_CLEAR_COLOR_BIT;
+
+   case VK_IMAGE_ASPECT_DEPTH_BIT:
+      return PVR_STATIC_CLEAR_DEPTH_BIT;
+
+   case VK_IMAGE_ASPECT_STENCIL_BIT:
+      return PVR_STATIC_CLEAR_STENCIL_BIT;
+
+   case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
+      return PVR_STATIC_CLEAR_DEPTH_BIT | PVR_STATIC_CLEAR_STENCIL_BIT;
+
+   default:
+      unreachable("Invalid aspect mask for clear.");
+      return 0;
+   }
+}
+
+static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
+                                  uint32_t attachment_count,
+                                  const VkClearAttachment *attachments,
+                                  uint32_t rect_count,
+                                  const VkClearRect *rects)
+{
+   const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
+   struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
+   const struct pvr_renderpass_hwsetup_subpass *hw_pass =
+      pvr_get_hw_subpass(pass, pass_info->subpass_idx);
+   struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
+   struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
+   bool z_replicate = hw_pass->z_replicate != -1;
+   uint32_t vs_output_size_in_bytes;
+   bool vs_has_rt_id_output;
+
+   /* TODO: This function can be optimized so that most of the device memory
+    * gets allocated together in one go and then filled as needed. There might
+    * also be opportunities to reuse pds code and data segments.
+    */
+
+   assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
+
+   pvr_reset_graphics_dirty_state(cmd_buffer, false);
+
+   /* We'll be emitting to the control stream. */
+   sub_cmd->empty_cmd = false;
+
+   vs_has_rt_id_output =
+      pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
+
+   /* 4 because we're expecting the USC to output X, Y, Z, and W. */
+   vs_output_size_in_bytes = 4 * sizeof(uint32_t);
+   if (vs_has_rt_id_output)
+      vs_output_size_in_bytes += sizeof(uint32_t);
+
+   for (uint32_t i = 0; i < attachment_count; i++) {
+      const VkClearAttachment *attachment = &attachments[i];
+      struct pvr_pds_vertex_shader_program pds_program;
+      struct pvr_pds_upload pds_program_upload = { 0 };
+      uint64_t current_base_array_layer = ~0;
+      VkResult result;
+      float depth;
+
+      if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
+         pvr_finishme("Implement clear for color attachment.");
+      } else if (z_replicate &&
+                 attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+         pvr_finishme("Implement clear for depth/depth+stencil attachment on "
+                      "z_replicate.");
+      } else {
+         struct pvr_static_clear_ppp_template template;
+         uint32_t template_idx;
+         struct pvr_bo *pvr_bo;
+
+         template_idx =
+            pvr_clear_template_idx_from_aspect(attachment->aspectMask);
+         template =
+            cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
+
+         if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
+            template.config.ispa.sref =
+               attachment->clearValue.depthStencil.stencil;
+         }
+
+         if (vs_has_rt_id_output) {
+            template.config.output_sel.rhw_pres = true;
+            template.config.output_sel.render_tgt_pres = true;
+            template.config.output_sel.vtxsize = 4 + 1;
+         }
+
+         result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
+                                             &template,
+                                             &pvr_bo);
+         if (result != VK_SUCCESS) {
+            cmd_buffer->state.status = result;
+            return;
+         }
+
+         list_add(&pvr_bo->link, &cmd_buffer->bo_list);
+      }
+
+      if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+         depth = attachment->clearValue.depthStencil.depth;
+      else
+         depth = 1.0f;
+
+      if (vs_has_rt_id_output) {
+         const struct pvr_device_static_clear_state *dev_clear_state =
+            &cmd_buffer->device->static_clear_state;
+         const struct pvr_bo *multi_layer_vert_bo =
+            dev_clear_state->usc_multi_layer_vertex_shader_bo;
+
+         /* We can't use the device's passthrough pds program since it doesn't
+          * have iterate_instance_id enabled. We'll be uploading code sections
+          * per each clear rect.
+          */
+
+         /* TODO: See if we can allocate all the code section memory in one go.
+          * We'd need to make sure that changing instance_id_modifier doesn't
+          * change the code section size.
+          * Also check if we can reuse the same code segment for each rect.
+          * Seems like the instance_id_modifier is written into the data section
+          * and used by the pds ADD instruction that way instead of it being
+          * embedded into the code section.
+          */
+
+         pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
+                                                           multi_layer_vert_bo);
+      } else {
+         /* We can reuse the device's code section but we'll need to upload data
+          * sections so initialize the program.
+          */
+         pvr_pds_clear_vertex_shader_program_init_base(
+            &pds_program,
+            cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
+
+         pds_program_upload.code_offset =
+            cmd_buffer->device->static_clear_state.pds.code_offset;
+         /* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
+          * let's change its interface to make that clear and not set this?
+          */
+         pds_program_upload.code_size =
+            cmd_buffer->device->static_clear_state.pds.code_size;
+      }
+
+      for (uint32_t j = 0; j < rect_count; j++) {
+         struct pvr_pds_upload pds_program_data_upload;
+         const VkClearRect *clear_rect = &rects[j];
+         struct pvr_bo *vertices_bo;
+         uint32_t *vdm_cs_buffer;
+         VkResult result;
+
+         if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
+             (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
+            pvr_finishme("Add deferred RTA clear.");
+
+            if (clear_rect->baseArrayLayer != 0)
+               continue;
+         }
+
+         /* TODO: Allocate all the buffers in one go before the loop, and add
+          * support to multi-alloc bo.
+          */
+         result = pvr_clear_vertices_upload(cmd_buffer->device,
+                                            &clear_rect->rect,
+                                            depth,
+                                            &vertices_bo);
+         if (result != VK_SUCCESS) {
+            cmd_buffer->state.status = result;
+            return;
+         }
+
+         list_add(&vertices_bo->link, &cmd_buffer->bo_list);
+
+         if (vs_has_rt_id_output) {
+            if (current_base_array_layer != clear_rect->baseArrayLayer) {
+               const uint32_t base_array_layer = clear_rect->baseArrayLayer;
+               struct pvr_pds_upload pds_program_code_upload;
+
+               result =
+                  pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
+                     &pds_program,
+                     cmd_buffer,
+                     base_array_layer,
+                     &pds_program_code_upload);
+               if (result != VK_SUCCESS) {
+                  cmd_buffer->state.status = result;
+                  return;
+               }
+
+               pds_program_upload.code_offset =
+                  pds_program_code_upload.code_offset;
+               /* TODO: The code size doesn't get used by pvr_clear_vdm_state()
+                * maybe let's change its interface to make that clear and not
+                * set this?
+                */
+               pds_program_upload.code_size = pds_program_code_upload.code_size;
+
+               current_base_array_layer = base_array_layer;
+            }
+
+            result =
+               pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
+                  &pds_program,
+                  cmd_buffer,
+                  vertices_bo,
+                  &pds_program_data_upload);
+            if (result != VK_SUCCESS)
+               return;
+         } else {
+            result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
+               &pds_program,
+               cmd_buffer,
+               vertices_bo,
+               &pds_program_data_upload);
+            if (result != VK_SUCCESS)
+               return;
+         }
+
+         pds_program_upload.data_offset = pds_program_data_upload.data_offset;
+         pds_program_upload.data_size = pds_program_data_upload.data_size;
+
+         vdm_cs_buffer = pvr_csb_alloc_dwords(&sub_cmd->control_stream,
+                                              PVR_CLEAR_VDM_STATE_DWORD_COUNT);
+         if (!vdm_cs_buffer) {
+            result = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+            cmd_buffer->state.status = result;
+            return;
+         }
+
+         pvr_pack_clear_vdm_state(dev_info,
+                                  &pds_program_upload,
+                                  pds_program.temps_used,
+                                  4,
+                                  vs_output_size_in_bytes,
+                                  clear_rect->layerCount,
+                                  vdm_cs_buffer);
+      }
+   }
+}
+
 void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
                              uint32_t attachmentCount,
                              const VkClearAttachment *pAttachments,
                              uint32_t rectCount,
                              const VkClearRect *pRects)
 {
-   assert(!"Unimplemented");
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+   assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
+
+   /* TODO: There are some optimizations that can be made here:
+    *  - For a full screen clear, update the clear values for the corresponding
+    *    attachment index.
+    *  - For a full screen color attachment clear, add its index to a load op
+    *    override to add it to the background shader. This will elide any load
+    *    op loads currently in the background shader as well as the usual
+    *    frag kick for geometry clear.
+    */
+
+   /* If we have any depth/stencil clears, update the sub command depth/stencil
+    * modification and usage flags.
+    */
+   if (state->depth_format != VK_FORMAT_UNDEFINED) {
+      uint32_t full_screen_clear_count;
+      bool has_stencil_clear = false;
+      bool has_depth_clear = false;
+
+      for (uint32_t i = 0; i < attachmentCount; i++) {
+         const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
+
+         if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
+            has_stencil_clear = true;
+
+         if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
+            has_depth_clear = true;
+
+         if (has_stencil_clear && has_depth_clear)
+            break;
+      }
+
+      sub_cmd->modifies_stencil |= has_stencil_clear;
+      sub_cmd->modifies_depth |= has_depth_clear;
+
+      /* We only care about clears that have a baseArrayLayer of 0 as any
+       * attachment clears we move to the background shader must apply to all of
+       * the attachment's sub resources.
+       */
+      full_screen_clear_count =
+         pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
+                                            0,
+                                            rectCount,
+                                            pRects);
+
+      if (full_screen_clear_count > 0) {
+         if (has_stencil_clear &&
+             sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
+            sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
+         }
+
+         if (has_depth_clear &&
+             sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
+            sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
+         }
+      }
+   }
+
+   pvr_clear_attachments(cmd_buffer,
+                         attachmentCount,
+                         pAttachments,
+                         rectCount,
+                         pRects);
 }
 
 void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
index 5a6e551..5795b03 100644 (file)
@@ -237,10 +237,10 @@ VkResult pvr_emit_ppp_from_template(
  * \param[out] pvr_bo_out  BO upload object.
  * \return VK_SUCCESS if the upload succeeded.
  */
-static VkResult pvr_clear_vertices_upload(struct pvr_device *device,
-                                          const VkRect2D *rect,
-                                          float depth,
-                                          struct pvr_bo **const pvr_bo_out)
+VkResult pvr_clear_vertices_upload(struct pvr_device *device,
+                                   const VkRect2D *rect,
+                                   float depth,
+                                   struct pvr_bo **const pvr_bo_out)
 {
    const float y1 = (float)(rect->offset.y + rect->extent.height);
    const float x1 = (float)(rect->offset.x + rect->extent.width);
index f0c6ddd..826ee7b 100644 (file)
@@ -185,4 +185,9 @@ void pvr_pack_clear_vdm_state(
    uint32_t layer_count,
    uint32_t state_buffer[const static PVR_CLEAR_VDM_STATE_DWORD_COUNT]);
 
+VkResult pvr_clear_vertices_upload(struct pvr_device *device,
+                                   const VkRect2D *rect,
+                                   float depth,
+                                   struct pvr_bo **const pvr_bo_out);
+
 #endif /* PVR_CLEAR_H */
index c9ef4d1..531a82d 100644 (file)
@@ -1636,9 +1636,8 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
    return VK_SUCCESS;
 }
 
-static void
-pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
-                               bool start_geom)
+void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
+                                    bool start_geom)
 {
    struct vk_dynamic_graphics_state *const dynamic_state =
       &cmd_buffer->vk.dynamic_graphics_state;
@@ -2165,7 +2164,7 @@ static VkResult pvr_init_render_targets(struct pvr_device *device,
    return VK_SUCCESS;
 }
 
-static const struct pvr_renderpass_hwsetup_subpass *
+const struct pvr_renderpass_hwsetup_subpass *
 pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass)
 {
    const struct pvr_renderpass_hw_map *map =
index c1a0b9f..5a4c277 100644 (file)
@@ -1586,6 +1586,12 @@ void pvr_device_destroy_compute_query_programs(struct pvr_device *device);
 VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
                                const struct pvr_query_info *query_info);
 
+void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer,
+                                    bool start_geom);
+
+const struct pvr_renderpass_hwsetup_subpass *
+pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass);
+
 #define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
    VK_FROM_HANDLE(__pvr_type, __name, __handle)