tu: Support GMEM with layered rendering and multiview
authorConnor Abbott <cwabbott0@gmail.com>
Wed, 2 Nov 2022 16:22:21 +0000 (17:22 +0100)
committerMarge Bot <emma+marge@anholt.net>
Tue, 8 Nov 2022 16:35:02 +0000 (16:35 +0000)
It turns out that this actually is supported. GMEM can hold multiple
layers which are cleared, loaded, and resolved separately. The stride
between layers seems to be implicitly calculated based on the tile size,
and we have to match it when blitting to/from GMEM. One tricky thing is
that now we may realize that we don't have enough space for GMEM only
when computing the tiling config, because we may not know the number of
framebuffer layers until we have the framebuffer and too many
framebuffer layers will exhaust GMEM.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19505>

src/freedreno/vulkan/tu_clear_blit.c
src/freedreno/vulkan/tu_clear_blit.h
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_cmd_buffer.h
src/freedreno/vulkan/tu_device.h
src/freedreno/vulkan/tu_pass.c
src/freedreno/vulkan/tu_pass.h
src/freedreno/vulkan/tu_util.c

index 46b5342..d781344 100644 (file)
@@ -2710,6 +2710,9 @@ static void
 tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
                               struct tu_cs *cs,
                               uint32_t attachment,
+                              uint32_t base_layer,
+                              uint32_t layers,
+                              uint32_t layer_mask,
                               VkImageAspectFlags mask,
                               const VkClearValue *value)
 {
@@ -2722,14 +2725,21 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
                    A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(att->samples)));
 
    enum pipe_format format = tu_vk_format_to_pipe_format(att->format);
-   if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
-      if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
-         clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, tu_attachment_gmem_offset(cmd, att), value);
-      if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
-         clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, tu_attachment_gmem_offset_stencil(cmd, att), value);
-   } else {
-      clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask),
-                            tu_attachment_gmem_offset(cmd, att), value);
+   for_each_layer(i, layer_mask, layers) {
+      uint32_t layer = i + base_layer;
+      if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+         if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+            clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf,
+                                  tu_attachment_gmem_offset(cmd, att, layer), value);
+         }
+         if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
+            clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf,
+                                  tu_attachment_gmem_offset_stencil(cmd, att, layer), value);
+         }
+      } else {
+         clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask),
+                               tu_attachment_gmem_offset(cmd, att, layer), value);
+      }
    }
 
    trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples);
@@ -2768,7 +2778,10 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
          if (a == VK_ATTACHMENT_UNUSED)
                continue;
 
-         tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
+         tu_emit_clear_gmem_attachment(cmd, cs, a, rects[i].baseArrayLayer,
+                                       rects[i].layerCount,
+                                       subpass->multiview_mask,
+                                       attachments[j].aspectMask,
                                        &attachments[j].clearValue);
       }
    }
@@ -2946,7 +2959,9 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
    if (!attachment->clear_mask)
       return;
 
-   tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask, value);
+   tu_emit_clear_gmem_attachment(cmd, cs, a, 0, cmd->state.framebuffer->layers,
+                                 attachment->clear_views,
+                                 attachment->clear_mask, value);
 }
 
 static void
@@ -2966,37 +2981,39 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
       .sample_0 = vk_format_is_int(attachment->format) ||
          vk_format_is_depth_or_stencil(attachment->format)));
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
-   if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
-      if (!separate_stencil) {
-         tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
-         tu_cs_emit_qw(cs, iview->depth_base_addr);
-         tu_cs_emit(cs, iview->depth_PITCH);
+   for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) {
+      tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
+      if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+         if (!separate_stencil) {
+            tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
+            tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * i);
+            tu_cs_emit(cs, iview->depth_PITCH);
 
-         tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
-         tu_cs_image_flag_ref(cs, &iview->view, 0);
+            tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
+            tu_cs_image_flag_ref(cs, &iview->view, i);
+         } else {
+            tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
+            tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * i);
+            tu_cs_emit(cs, iview->stencil_PITCH);
+         }
       } else {
-         tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
-         tu_cs_emit_qw(cs, iview->stencil_base_addr);
-         tu_cs_emit(cs, iview->stencil_PITCH);
-      }
-   } else {
-      tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
-      tu_cs_image_ref_2d(cs, &iview->view, 0, false);
+         tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
+         tu_cs_image_ref_2d(cs, &iview->view, i, false);
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
-      tu_cs_image_flag_ref(cs, &iview->view, 0);
-   }
+         tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
+         tu_cs_image_flag_ref(cs, &iview->view, i);
+      }
 
-   if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) {
+      if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) {
+            tu_cs_emit_regs(cs,
+                           A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment, i)));
+      } else {
          tu_cs_emit_regs(cs,
-                        A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment)));
-   } else {
-      tu_cs_emit_regs(cs,
-                     A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment)));
-   }
+                        A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment, i)));
+      }
 
-   tu6_emit_event_write(cmd, cs, BLIT);
+      tu6_emit_event_write(cmd, cs, BLIT);
+   }
 }
 
 static bool
@@ -3132,6 +3149,7 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
               bool separate_stencil,
               enum pipe_format src_format,
               enum pipe_format dst_format,
+              uint32_t layer,
               uint32_t gmem_offset,
               uint32_t cpp)
 {
@@ -3140,12 +3158,12 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
 
    if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
       if (!separate_stencil) {
-         r2d_dst_depth(cs, iview, 0);
+         r2d_dst_depth(cs, iview, layer);
       } else {
-         r2d_dst_stencil(cs, iview, 0);
+         r2d_dst_stencil(cs, iview, layer);
       }
    } else {
-      r2d_dst(cs, &iview->view, 0, src_format);
+      r2d_dst(cs, &iview->view, layer, src_format);
    }
 
    enum a6xx_format fmt = tu6_format_texture(src_format, TILE6_2).fmt;
@@ -3192,6 +3210,7 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
               enum pipe_format src_format,
               enum pipe_format dst_format,
               const VkRect2D *render_area,
+              uint32_t layer,
               uint32_t gmem_offset,
               uint32_t cpp)
 {
@@ -3213,12 +3232,12 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
 
    if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
       if (!separate_stencil) {
-         r3d_dst_depth(cs, iview, 0);
+         r3d_dst_depth(cs, iview, layer);
       } else {
-         r3d_dst_stencil(cs, iview, 0);
+         r3d_dst_stencil(cs, iview, layer);
       }
    } else {
-      r3d_dst(cs, &iview->view, 0, src_format);
+      r3d_dst(cs, &iview->view, layer, src_format);
    }
 
    r3d_src_gmem(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp);
@@ -3312,6 +3331,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
                          struct tu_cs *cs,
                          uint32_t a,
                          uint32_t gmem_a,
+                         uint32_t layers,
+                         uint32_t layer_mask,
                          bool cond_exec_allowed)
 {
    const VkRect2D *render_area = &cmd->state.render_area;
@@ -3389,25 +3410,29 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
       if (store_common || store_separate_stencil)
          tu_disable_draw_states(cmd, cs);
 
-      if (store_common) {
-         store_3d_blit(cmd, cs, iview, dst->samples, false, src_format,
-                       dst_format, render_area, tu_attachment_gmem_offset(cmd, src), src->cpp);
-      }
-      if (store_separate_stencil) {
-         store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
-                       PIPE_FORMAT_S8_UINT, render_area,
-                       tu_attachment_gmem_offset_stencil(cmd, src), src->samples);
+      for_each_layer(i, layer_mask, layers) {
+         if (store_common) {
+            store_3d_blit(cmd, cs, iview, dst->samples, false, src_format,
+                          dst_format, render_area, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);
+         }
+         if (store_separate_stencil) {
+            store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
+                          PIPE_FORMAT_S8_UINT, render_area, i,
+                          tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples);
+         }
       }
    } else {
       r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
 
-      if (store_common) {
-         store_cp_blit(cmd, cs, iview, src->samples, false, src_format,
-                       dst_format, tu_attachment_gmem_offset(cmd, src), src->cpp);
-      }
-      if (store_separate_stencil) {
-         store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT,
-                       PIPE_FORMAT_S8_UINT, tu_attachment_gmem_offset_stencil(cmd, src), src->samples);
+      for_each_layer(i, layer_mask, layers) {
+         if (store_common) {
+            store_cp_blit(cmd, cs, iview, src->samples, false, src_format,
+                          dst_format, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);
+         }
+         if (store_separate_stencil) {
+            store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT,
+                          PIPE_FORMAT_S8_UINT, i, tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples);
+         }
       }
    }
 
index 77a289a..bab3a15 100644 (file)
@@ -56,6 +56,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
                          struct tu_cs *cs,
                          uint32_t a,
                          uint32_t gmem_a,
+                         uint32_t layers,
+                         uint32_t layer_mask,
                          bool cond_exec_allowed);
 
 void
index dc5a026..885a14e 100644 (file)
@@ -283,7 +283,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
       tu_cs_image_depth_ref(cs, iview, 0);
    else
       tu_cs_image_ref(cs, &iview->view, 0);
-   tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment));
+   tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0));
 
    tu_cs_emit_regs(cs,
                    A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
@@ -298,10 +298,10 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
       tu_cs_emit(cs, A6XX_RB_STENCIL_INFO(.separate_stencil = true).value);
       if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
          tu_cs_image_stencil_ref(cs, iview, 0);
-         tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment));
+         tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment, 0));
       } else {
          tu_cs_image_ref(cs, &iview->view, 0);
-         tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment));
+         tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0));
       }
    } else {
       tu_cs_emit_regs(cs,
@@ -347,7 +347,7 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
       tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
       tu_cs_emit(cs, iview->view.RB_MRT_BUF_INFO);
       tu_cs_image_ref(cs, &iview->view, 0);
-      tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a]));
+      tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a], 0));
 
       tu_cs_emit_regs(cs,
                       A6XX_SP_FS_MRT_REG(i, .dword = iview->view.SP_FS_MRT_REG));
@@ -685,7 +685,8 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd,
       return true;
 
    /* can't fit attachments into gmem */
-   if (!cmd->state.pass->gmem_pixels[cmd->state.gmem_layout])
+   if (!cmd->state.pass->gmem_pixels[cmd->state.gmem_layout] ||
+       !cmd->state.tiling->possible)
       return true;
 
    if (cmd->state.framebuffer->layers > 1)
@@ -863,6 +864,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
    const struct tu_render_pass *pass = cmd->state.pass;
    const struct tu_subpass *subpass = &pass->subpasses[pass->subpass_count-1];
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
 
    tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
    tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
@@ -870,8 +872,11 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu6_emit_blit_scissor(cmd, cs, true);
 
    for (uint32_t a = 0; a < pass->attachment_count; ++a) {
-      if (pass->attachments[a].gmem)
-         tu_store_gmem_attachment(cmd, cs, a, a, cmd->state.tiling->binning_possible);
+      if (pass->attachments[a].gmem) {
+         tu_store_gmem_attachment(cmd, cs, a, a,
+                                  fb->layers, subpass->multiview_mask,
+                                  cmd->state.tiling->binning_possible);
+      }
    }
 
    if (subpass->resolve_attachments) {
@@ -879,7 +884,8 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
          uint32_t a = subpass->resolve_attachments[i].attachment;
          if (a != VK_ATTACHMENT_UNUSED) {
             uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
-            tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
+            tu_store_gmem_attachment(cmd, cs, a, gmem_a, fb->layers,
+                                     subpass->multiview_mask, false);
          }
       }
    }
@@ -1195,7 +1201,7 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
       const struct tu_render_pass_attachment *att =
          &cmd->state.pass->attachments[a];
       uint32_t *dst = &texture.map[A6XX_TEX_CONST_DWORDS * i];
-      uint32_t gmem_offset = tu_attachment_gmem_offset(cmd, att);
+      uint32_t gmem_offset = tu_attachment_gmem_offset(cmd, att, 0);
       uint32_t cpp = att->cpp;
 
       memcpy(dst, iview->view.descriptor, A6XX_TEX_CONST_DWORDS * 4);
@@ -1265,6 +1271,9 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
       dst[2] =
          A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
          A6XX_TEX_CONST_2_PITCH(tiling->tile0.width * cpp);
+      /* Note: it seems the HW implicitly calculates the array pitch with the
+       * GMEM tiling, so we don't need to specify the pitch ourselves.
+       */
       dst[3] = 0;
       dst[4] = cmd->device->physical_device->gmem_base + gmem_offset;
       dst[5] = A6XX_TEX_CONST_5_DEPTH(1);
@@ -4378,6 +4387,7 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
    }
 
    const struct tu_render_pass *pass = cmd->state.pass;
+   const struct tu_framebuffer *fb = cmd->state.framebuffer;
    struct tu_cs *cs = &cmd->draw_cs;
    const struct tu_subpass *last_subpass = cmd->state.subpass;
 
@@ -4405,7 +4415,8 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
 
          uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
 
-         tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
+         tu_store_gmem_attachment(cmd, cs, a, gmem_a, fb->layers,
+                                  subpass->multiview_mask, false);
 
          if (!pass->attachments[a].gmem)
             continue;
index 9b89b71..7164b90 100644 (file)
@@ -609,18 +609,23 @@ extern const struct vk_command_buffer_ops tu_cmd_buffer_ops;
 
 static inline uint32_t
 tu_attachment_gmem_offset(struct tu_cmd_buffer *cmd,
-                          const struct tu_render_pass_attachment *att)
+                          const struct tu_render_pass_attachment *att,
+                          uint32_t layer)
 {
    assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT);
-   return att->gmem_offset[cmd->state.gmem_layout];
+   return att->gmem_offset[cmd->state.gmem_layout] +
+      layer * cmd->state.tiling->tile0.width * cmd->state.tiling->tile0.height *
+      att->cpp;
 }
 
 static inline uint32_t
 tu_attachment_gmem_offset_stencil(struct tu_cmd_buffer *cmd,
-                                  const struct tu_render_pass_attachment *att)
+                                  const struct tu_render_pass_attachment *att,
+                                  uint32_t layer)
 {
    assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT);
-   return att->gmem_offset_stencil[cmd->state.gmem_layout];
+   return att->gmem_offset_stencil[cmd->state.gmem_layout] +
+      layer * cmd->state.tiling->tile0.width * cmd->state.tiling->tile0.height;
 }
 
 void tu_render_pass_state_merge(struct tu_render_pass_state *dst,
index ff26693..1b63991 100644 (file)
@@ -380,6 +380,9 @@ struct tu_tiling_config {
    /* number of VSC pipes */
    VkExtent2D pipe_count;
 
+   /* Whether using GMEM is even possible with this configuration */
+   bool possible;
+
    /* Whether binning should be used for gmem rendering using this framebuffer. */
    bool binning;
 
index 84c1c30..a9517c7 100644 (file)
@@ -550,20 +550,6 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
 {
    for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT;
         layout++) {
-      /* From the VK_KHR_multiview spec:
-       *
-       *    Multiview is all-or-nothing for a render pass - that is, either all
-       *    subpasses must have a non-zero view mask (though some subpasses may
-       *    have only one view) or all must be zero.
-       *
-       * This means we only have to check one of the view masks.
-       */
-      if (pass->subpasses[0].multiview_mask) {
-         /* It seems multiview must use sysmem rendering. */
-         pass->gmem_pixels[layout] = 0;
-         continue;
-      }
-
       /* log2(gmem_align/(tile_align_w*tile_align_h)) */
       uint32_t block_align_shift = 3;
       uint32_t tile_align_w = phys_dev->info->tile_align_w;
@@ -572,14 +558,17 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
 
       /* calculate total bytes per pixel */
       uint32_t cpp_total = 0;
+      uint32_t min_cpp = UINT32_MAX;
       for (uint32_t i = 0; i < pass->attachment_count; i++) {
          struct tu_render_pass_attachment *att = &pass->attachments[i];
          bool cpp1 = (att->cpp == 1);
          if (att->gmem) {
             cpp_total += att->cpp;
+            min_cpp = MIN2(min_cpp, att->cpp);
 
             /* take into account the separate stencil: */
             if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+               min_cpp = MIN2(min_cpp, att->samples);
                cpp1 = (att->samples == 1);
                cpp_total += att->samples;
             }
@@ -596,6 +585,7 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
       }
 
       pass->tile_align_w = tile_align_w;
+      pass->min_cpp = min_cpp;
 
       /* no gmem attachments */
       if (cpp_total == 0) {
index 06f1185..a43288e 100644 (file)
@@ -100,6 +100,7 @@ struct tu_render_pass
    uint32_t subpass_count;
    uint32_t gmem_pixels[TU_GMEM_LAYOUT_COUNT];
    uint32_t tile_align_w;
+   uint32_t min_cpp;
    uint64_t autotune_hash;
 
    /* memory bandwidth costs (in bytes) for gmem / sysmem rendering */
index 9b0b9a4..537bf6e 100644 (file)
@@ -67,11 +67,49 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
                                     enum tu_gmem_layout gmem_layout)
 {
    const uint32_t tile_align_w = pass->tile_align_w;
-   const uint32_t tile_align_h = dev->physical_device->info->tile_align_h;
+   uint32_t tile_align_h = dev->physical_device->info->tile_align_h;
    const uint32_t max_tile_width = dev->physical_device->info->tile_max_w;
    const uint32_t max_tile_height = dev->physical_device->info->tile_max_h;
    struct tu_tiling_config *tiling = &fb->tiling[gmem_layout];
 
+   /* From the Vulkan 1.3.232 spec, under VkFramebufferCreateInfo:
+    *
+    *   If the render pass uses multiview, then layers must be one and each
+    *   attachment requires a number of layers that is greater than the
+    *   maximum bit index set in the view mask in the subpasses in which it is
+    *   used.
+    */
+
+   uint32_t layers = fb->layers;
+   if (pass->subpasses[0].multiview_mask) {
+      uint32_t view_mask = 0;
+      for (unsigned i = 0; i < pass->subpass_count; i++)
+         view_mask |= pass->subpasses[i].multiview_mask;
+      layers = util_logbase2(view_mask) + 1;
+   }
+
+   /* If there is more than one layer, we need to make sure that the layer
+    * stride is expressible as an offset in RB_BLIT_BASE_GMEM which ignores
+    * the low 12 bits. The layer stride seems to be implicitly calculated from
+    * the tile width and height so we need to adjust one of them.
+    */
+   const uint32_t gmem_align_log2 = 12;
+   const uint32_t gmem_align = 1 << gmem_align_log2;
+   uint32_t min_layer_stride = tile_align_h * tile_align_w * pass->min_cpp;
+   if (layers > 1 && align(min_layer_stride, gmem_align) != min_layer_stride) {
+      /* Make sure that min_layer_stride is a multiple of gmem_align. Because
+       * gmem_align is a power of two and min_layer_stride isn't already a
+       * multiple of gmem_align, this is equivalent to shifting tile_align_h
+       * until the number of 0 bits at the bottom of min_layer_stride is at
+       * least gmem_align_log2.
+       */
+      tile_align_h <<= gmem_align_log2 - (ffs(min_layer_stride) - 1);
+
+      /* Check that we did the math right. */
+      min_layer_stride = tile_align_h * tile_align_w * pass->min_cpp;
+      assert(align(min_layer_stride, gmem_align) == min_layer_stride);
+   }
+
    /* start from 1 tile */
    tiling->tile_count = (VkExtent2D) {
       .width = 1,
@@ -110,16 +148,23 @@ tu_tiling_config_update_tile_layout(struct tu_framebuffer *fb,
          util_align_npot(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
    }
 
+   tiling->possible = true;
+
    /* do not exceed gmem size */
-   while (tiling->tile0.width * tiling->tile0.height > pass->gmem_pixels[gmem_layout]) {
+   while (tiling->tile0.width * tiling->tile0.height * layers > pass->gmem_pixels[gmem_layout]) {
       if (tiling->tile0.width > MAX2(tile_align_w, tiling->tile0.height)) {
          tiling->tile_count.width++;
          tiling->tile0.width =
             util_align_npot(DIV_ROUND_UP(fb->width, tiling->tile_count.width), tile_align_w);
       } else {
-         /* if this assert fails then layout is impossible.. */
-         assert(tiling->tile0.height > tile_align_h);
          tiling->tile_count.height++;
+         if (DIV_ROUND_UP(fb->height, tiling->tile_count.height) < tile_align_h) {
+            /* Tiling is impossible. This may happen when there is more than
+             * one layer.
+             */
+            tiling->possible = false;
+            return;
+         }
          tiling->tile0.height =
             align(DIV_ROUND_UP(fb->height, tiling->tile_count.height), tile_align_h);
       }