v3dv: compute tile granularity for each subpass
authorIago Toral Quiroga <itoral@igalia.com>
Mon, 6 Apr 2020 07:25:28 +0000 (09:25 +0200)
committerMarge Bot <eric+marge@anholt.net>
Tue, 13 Oct 2020 21:21:29 +0000 (21:21 +0000)
We must update our check for whether the render area is tile-aligned for
each subpass, since the hardware will update tile sizes for each RCL.

Fixes:
dEQP-VK.renderpass.suballocation.attachment_allocation.roll.8

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>

src/broadcom/vulkan/v3dv_cmd_buffer.c
src/broadcom/vulkan/v3dv_pass.c
src/broadcom/vulkan/v3dv_private.h

index 2ef264c..ddafc89 100644 (file)
@@ -741,9 +741,9 @@ cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer,
    }
 
    VkExtent2D granularity;
-   VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
-   VkRenderPass _pass = v3dv_render_pass_to_handle(cmd_buffer->state.pass);
-   v3dv_GetRenderAreaGranularity(_device, _pass, &granularity);
+   v3dv_subpass_get_granularity(cmd_buffer->state.pass,
+                                cmd_buffer->state.subpass_idx,
+                                &granularity);
 
    cmd_buffer->state.tile_aligned_render_area =
       clip_rect->offset.x % granularity.width == 0 &&
@@ -934,13 +934,6 @@ v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
       state->dirty |= V3DV_CMD_DIRTY_SCISSOR;
    }
 
-   /* Check if our render area is aligned to tile boundaries */
-   VkExtent2D fb_extent = {
-      .width = framebuffer->width,
-      .height = framebuffer->height
-   };
-   cmd_buffer_update_tile_alignment(cmd_buffer, &state->render_area, &fb_extent);
-
    /* Setup for first subpass */
    v3dv_cmd_buffer_subpass_start(cmd_buffer, 0);
 }
@@ -1725,6 +1718,18 @@ v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
    if (!job)
       return NULL;
 
+   /* Check if our render area is aligned to tile boundaries. We have to do
+    * this in each subpass because the subset of attachments used can change
+    * and with that the tile size selected by the hardware can change too.
+    */
+   assert(state->framebuffer);
+   VkExtent2D fb_extent = {
+      .width = state->framebuffer->width,
+      .height = state->framebuffer->height
+   };
+   cmd_buffer_update_tile_alignment(cmd_buffer, &state->render_area, &fb_extent);
+
+
    /* If we can't use TLB clears then we need to emit draw clears for any
     * LOAD_OP_CLEAR attachments in this subpass now.
     */
index cdaf284..4cf41be 100644 (file)
@@ -204,15 +204,10 @@ v3dv_DestroyRenderPass(VkDevice _device,
 }
 
 void
-v3dv_GetRenderAreaGranularity(VkDevice device,
-                              VkRenderPass renderPass,
-                              VkExtent2D *pGranularity)
+v3dv_subpass_get_granularity(struct v3dv_render_pass *pass,
+                             uint32_t subpass_idx,
+                             VkExtent2D *granularity)
 {
-   V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
-
-   /* Our tile size depends on the max number of color attachments we can
-    * have in any subpass and the maximum bpp across all of them.
-    */
    static const uint8_t tile_sizes[] = {
       64, 64,
       64, 32,
@@ -221,36 +216,59 @@ v3dv_GetRenderAreaGranularity(VkDevice device,
       16, 16,
    };
 
-   /* Find maximum number of color attachments in any subpass */
-   uint32_t max_color_attachment_count = 0;
-   for (unsigned i = 0; i < pass->subpass_count; i++) {
-      max_color_attachment_count = MAX2(max_color_attachment_count,
-                                        pass->subpasses[i].color_count);
-   }
+   /* Our tile size depends on the number of color attachments and the maximum
+    * bpp across them.
+    */
+   assert(subpass_idx >= 0 && subpass_idx < pass->subpass_count);
+   struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
+   const uint32_t color_attachment_count = subpass->color_count;
 
-   /* Find maximum bpp in any color attachment */
    uint32_t max_internal_bpp = 0;
-   for (unsigned i = 0; i < pass->attachment_count; i++) {
-      VkFormat vk_format = pass->attachments[i].desc.format;
-      if (vk_format_is_color(vk_format)) {
-         const struct v3dv_format *format = v3dv_get_format(vk_format);
-         uint32_t internal_type, internal_bpp;
-         v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
-                                                      &internal_type,
-                                                      &internal_bpp);
-         max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
-      }
+   for (uint32_t i = 0; i < color_attachment_count; i++) {
+      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
+      if (attachment_idx == VK_ATTACHMENT_UNUSED)
+         continue;
+      const VkAttachmentDescription *desc =
+         &pass->attachments[attachment_idx].desc;
+      const struct v3dv_format *format = v3dv_get_format(desc->format);
+      uint32_t internal_type, internal_bpp;
+      v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
+                                                   &internal_type,
+                                                   &internal_bpp);
+      max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
    }
 
    uint32_t idx = 0;
-   if (max_color_attachment_count > 2)
+   if (color_attachment_count > 2)
       idx += 2;
-   else if (max_color_attachment_count > 1)
+   else if (color_attachment_count > 1)
       idx += 1;
 
    idx += max_internal_bpp;
 
    assert(idx < ARRAY_SIZE(tile_sizes));
-   *pGranularity = (VkExtent2D) { .width = tile_sizes[idx * 2],
-                                  .height = tile_sizes[idx * 2 + 1] };
+   *granularity = (VkExtent2D) {
+      .width = tile_sizes[idx * 2],
+      .height = tile_sizes[idx * 2 + 1]
+   };
+}
+
+void
+v3dv_GetRenderAreaGranularity(VkDevice device,
+                              VkRenderPass renderPass,
+                              VkExtent2D *pGranularity)
+{
+   V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
+
+   *pGranularity = (VkExtent2D) {
+      .width = 64,
+      .height = 64,
+   };
+
+   for (uint32_t i = 0; i < pass->subpass_count; i++) {
+      VkExtent2D sg;
+      v3dv_subpass_get_granularity(pass, i, &sg);
+      pGranularity->width = MIN2(pGranularity->width, sg.width);
+      pGranularity->height = MIN2(pGranularity->height, sg.height);
+   }
 }
index 6e511cc..97d5940 100644 (file)
@@ -445,6 +445,10 @@ struct v3dv_render_pass {
    struct v3dv_subpass_attachment *subpass_attachments;
 };
 
+void v3dv_subpass_get_granularity(struct v3dv_render_pass *pass,
+                                  uint32_t subpass_idx,
+                                  VkExtent2D *granularity);
+
 struct v3dv_framebuffer {
    uint32_t width;
    uint32_t height;