radv: flush L2 for images affected by the pipe misaligned issue on GFX10+
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 18 Jan 2021 10:19:20 +0000 (11:19 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 19 Jan 2021 19:51:44 +0000 (19:51 +0000)
In some rare cases, L2 needs to be flushed if an image is affected
by the pipe misaligned issue. This is roughly based on AMDVLK.

I confirmed that disabling TC-compat HTILE, and respectively DCC,
for the relevant images also fixes the regressions below.

This fixes some regressions introduced with L2 coherency for
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_* and for
dEQP-VK.renderpass2.suballocation.multisample_resolve.*.

Fixes: 4a783a3c784 ("radv: Use L2 coherency on GFX9+.")
Co-Authored-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8557>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h

index bddbba7..f3ecc94 100644 (file)
@@ -3267,11 +3267,62 @@ static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
        }
 }
 
+/* Determine if the image is affected by the pipe misaligned metadata issue
+ * which requires to invalidate L2.
+ */
+static bool
+radv_image_is_pipe_misaligned(const struct radv_device *device,
+                             const struct radv_image *image)
+{
+       struct radeon_info *rad_info = &device->physical_device->rad_info;
+       unsigned log2_samples = util_logbase2(image->info.samples);
+       unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(image->vk_format));
+       unsigned log2_bpp_and_samples;
+
+       assert(rad_info->chip_class >= GFX10);
+
+       if (rad_info->chip_class >= GFX10_3) {
+               log2_bpp_and_samples = log2_bpp + log2_samples;
+       } else {
+               if (vk_format_is_depth(image->vk_format) &&
+                    image->info.array_size >= 8) {
+                       log2_bpp = 2;
+               }
+
+               log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
+       }
+
+       unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
+       int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
+
+       if (vk_format_is_depth(image->vk_format)) {
+               if (radv_image_is_tc_compat_htile(image) && overlap) {
+                       return true;
+               }
+       } else {
+               unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
+               int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
+               int samples_overlap = MIN2(log2_samples, overlap);
+
+               /* TODO: It shouldn't be necessary if the image has DCC but
+                * not readable by shader.
+                */
+               if ((radv_image_has_dcc(image) ||
+                    radv_image_is_tc_compat_cmask(image)) &&
+                    (samples_overlap > log2_samples_frag_diff)) {
+                       return true;
+               }
+       }
+
+       return false;
+}
+
 static bool
 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
 {
        if (device->physical_device->rad_info.chip_class >= GFX10) {
-               return !device->physical_device->rad_info.tcc_harvested;
+               return !device->physical_device->rad_info.tcc_harvested &&
+                       (image && !radv_image_is_pipe_misaligned(device, image));
        } else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
                if (image->info.samples == 1 &&
                    (image->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
@@ -3461,11 +3512,28 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
 void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
                          const struct radv_subpass_barrier *barrier)
 {
-       cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask,
-                                                             NULL);
+       struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+       if (fb && !fb->imageless) {
+               for (int i = 0; i < fb->attachment_count; ++i) {
+                       cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask,
+                                                                             fb->attachments[i]->image);
+               }
+       } else {
+               cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask,
+                                                                     NULL);
+       }
+
        radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
-       cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
-                                                             NULL);
+
+       if (fb && !fb->imageless) {
+               for (int i = 0; i < fb->attachment_count; ++i) {
+                       cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
+                                                                             fb->attachments[i]->image);
+               }
+       } else {
+               cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
+                                                                     NULL);
+       }
 }
 
 uint32_t
index 2344bb7..7c37c00 100644 (file)
@@ -7332,6 +7332,8 @@ VkResult radv_CreateFramebuffer(
        framebuffer->width = pCreateInfo->width;
        framebuffer->height = pCreateInfo->height;
        framebuffer->layers = pCreateInfo->layers;
+       framebuffer->imageless = !!imageless_create_info;
+
        if (imageless_create_info) {
                for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
                        const VkFramebufferAttachmentImageInfo *attachment =
index be4920d..7b36238 100644 (file)
@@ -2326,6 +2326,8 @@ struct radv_framebuffer {
        uint32_t                                     height;
        uint32_t                                     layers;
 
+       bool                                         imageless;
+
        uint32_t                                     attachment_count;
        struct radv_image_view                       *attachments[0];
 };