anv: add gen9 astc workaround
authorChia-I Wu <olvaffe@gmail.com>
Tue, 17 Oct 2023 19:25:20 +0000 (12:25 -0700)
committerMarge Bot <emma+marge@anholt.net>
Wed, 25 Oct 2023 00:06:04 +0000 (00:06 +0000)
gen9 does not handle denorms in void extent blocks correctly.  We need
to flush them to zero.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25800>

src/intel/vulkan/anv_astc_emu.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_image.c
src/intel/vulkan/anv_private.h

index 90fbd3b..e48f3c8 100644 (file)
@@ -5,6 +5,8 @@
 
 #include "anv_private.h"
 
+#include "compiler/nir/nir_builder.h"
+
 static void
 astc_emu_init_image_view(struct anv_cmd_buffer *cmd_buffer,
                          struct anv_image_view *iview,
@@ -57,6 +59,261 @@ astc_emu_init_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
 }
 
 static void
+astc_emu_init_flush_denorm_shader(nir_builder *b)
+{
+   b->shader->info.workgroup_size[0] = 8;
+   b->shader->info.workgroup_size[1] = 8;
+
+   const struct glsl_type *src_type =
+      glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT);
+   nir_variable *src_var =
+      nir_variable_create(b->shader, nir_var_uniform, src_type, "src");
+   src_var->data.descriptor_set = 0;
+   src_var->data.binding = 0;
+
+   const struct glsl_type *dst_type =
+      glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_UINT);
+   nir_variable *dst_var =
+      nir_variable_create(b->shader, nir_var_uniform, dst_type, "dst");
+   dst_var->data.descriptor_set = 0;
+   dst_var->data.binding = 1;
+
+   nir_def *zero = nir_imm_int(b, 0);
+   nir_def *consts = nir_load_push_constant(b, 4, 32, zero, .range = 16);
+   nir_def *offset = nir_channels(b, consts, 0x3);
+   nir_def *extent = nir_channels(b, consts, 0x3 << 2);
+
+   nir_def *coord = nir_load_global_invocation_id(b, 32);
+   coord = nir_iadd(b, nir_channels(b, coord, 0x3), offset);
+
+   nir_def *cond = nir_ilt(b, coord, extent);
+   cond = nir_iand(b, nir_channel(b, cond, 0), nir_channel(b, cond, 1));
+   nir_push_if(b, cond);
+   {
+      const struct glsl_type *val_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
+      nir_variable *val_var =
+         nir_variable_create(b->shader, nir_var_shader_temp, val_type, "val");
+
+      coord = nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),
+                       zero);
+      nir_def *val =
+         nir_txf_deref(b, nir_build_deref_var(b, src_var), coord, zero);
+      nir_store_var(b, val_var, val, 0xf);
+
+      /* A void-extent block has this layout
+       *
+       *   struct astc_void_extent_block {
+       *      uint16_t header;
+       *      uint16_t dontcare0;
+       *      uint16_t dontcare1;
+       *      uint16_t dontcare2;
+       *      uint16_t R;
+       *      uint16_t G;
+       *      uint16_t B;
+       *      uint16_t A;
+       *   };
+       *
+       * where the lower 12 bits are 0xdfc for 2D LDR.
+       */
+      nir_def *block_mode = nir_iand_imm(b, nir_channel(b, val, 0), 0xfff);
+      nir_push_if(b, nir_ieq_imm(b, block_mode, 0xdfc));
+      {
+         nir_def *color = nir_channels(b, val, 0x3 << 2);
+         nir_def *comps = nir_unpack_64_4x16(b, nir_pack_64_2x32(b, color));
+
+         /* flush denorms */
+         comps = nir_bcsel(b, nir_ult_imm(b, comps, 4),
+                           nir_imm_intN_t(b, 0, 16), comps);
+
+         color = nir_unpack_64_2x32(b, nir_pack_64_4x16(b, comps));
+         val = nir_vec4(b, nir_channel(b, val, 0), nir_channel(b, val, 1),
+                        nir_channel(b, color, 0), nir_channel(b, color, 1));
+         nir_store_var(b, val_var, val, 0x3 << 2);
+      }
+      nir_pop_if(b, NULL);
+
+      nir_def *dst = &nir_build_deref_var(b, dst_var)->def;
+      coord = nir_pad_vector(b, coord, 4);
+      val = nir_load_var(b, val_var);
+      nir_image_deref_store(b, dst, coord, nir_undef(b, 1, 32), val, zero,
+                            .image_dim = GLSL_SAMPLER_DIM_2D,
+                            .image_array = true);
+   }
+   nir_pop_if(b, NULL);
+}
+
+static VkResult
+astc_emu_init_flush_denorm_pipeline_locked(struct anv_device *device)
+{
+   struct anv_device_astc_emu *astc_emu = &device->astc_emu;
+   VkDevice _device = anv_device_to_handle(device);
+   VkResult result = VK_SUCCESS;
+
+   if (astc_emu->ds_layout == VK_NULL_HANDLE) {
+      const VkDescriptorSetLayoutCreateInfo ds_layout_create_info = {
+         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+         .bindingCount = 2,
+         .pBindings = (VkDescriptorSetLayoutBinding[]){
+            {
+               .binding = 0,
+               .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+               .descriptorCount = 1,
+               .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+            {
+               .binding = 1,
+               .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+               .descriptorCount = 1,
+               .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+         },
+      };
+      result = anv_CreateDescriptorSetLayout(_device, &ds_layout_create_info,
+                                             NULL, &astc_emu->ds_layout);
+      if (result != VK_SUCCESS)
+         goto out;
+   }
+
+   if (astc_emu->pipeline_layout == VK_NULL_HANDLE) {
+      const VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+         .setLayoutCount = 1,
+         .pSetLayouts = &astc_emu->ds_layout,
+         .pushConstantRangeCount = 1,
+         .pPushConstantRanges = &(VkPushConstantRange){
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+            .size = sizeof(uint32_t) * 4,
+         },
+      };
+      result = anv_CreatePipelineLayout(_device, &pipeline_layout_create_info,
+                                        NULL, &astc_emu->pipeline_layout);
+      if (result != VK_SUCCESS)
+         goto out;
+   }
+
+   if (astc_emu->pipeline == VK_NULL_HANDLE) {
+      const struct nir_shader_compiler_options *options =
+         device->physical->compiler->nir_options[MESA_SHADER_COMPUTE];
+      nir_builder b = nir_builder_init_simple_shader(
+            MESA_SHADER_COMPUTE, options, "astc_emu_flush_denorm");
+      astc_emu_init_flush_denorm_shader(&b);
+
+      const VkComputePipelineCreateInfo pipeline_create_info = {
+         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+         .stage =
+            (VkPipelineShaderStageCreateInfo){
+               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+               .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+               .module = vk_shader_module_handle_from_nir(b.shader),
+               .pName = "main",
+            },
+         .layout = astc_emu->pipeline_layout,
+      };
+      result = anv_CreateComputePipelines(_device, VK_NULL_HANDLE, 1,
+                                          &pipeline_create_info, NULL,
+                                          &astc_emu->pipeline);
+      ralloc_free(b.shader);
+
+      if (result != VK_SUCCESS)
+         goto out;
+   }
+
+out:
+   return result;
+}
+
+static VkResult
+astc_emu_init_flush_denorm_pipeline(struct anv_device *device)
+{
+   struct anv_device_astc_emu *astc_emu = &device->astc_emu;
+   VkResult result = VK_SUCCESS;
+
+   simple_mtx_lock(&astc_emu->mutex);
+   if (!astc_emu->pipeline)
+      result = astc_emu_init_flush_denorm_pipeline_locked(device);
+   simple_mtx_unlock(&astc_emu->mutex);
+
+   return result;
+}
+
+static void
+astc_emu_flush_denorm_slice(struct anv_cmd_buffer *cmd_buffer,
+                            VkFormat astc_format,
+                            VkImageLayout layout,
+                            VkImageView src_view,
+                            VkImageView dst_view,
+                            VkRect2D rect)
+{
+   struct anv_device *device = cmd_buffer->device;
+   struct anv_device_astc_emu *astc_emu = &device->astc_emu;
+   VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
+
+   VkResult result = astc_emu_init_flush_denorm_pipeline(device);
+   if (result != VK_SUCCESS) {
+      anv_batch_set_error(&cmd_buffer->batch, result);
+      return;
+   }
+
+   const uint32_t push_const[] = {
+      rect.offset.x,
+      rect.offset.y,
+      rect.offset.x + rect.extent.width,
+      rect.offset.y + rect.extent.height,
+   };
+
+   const VkWriteDescriptorSet set_writes[] = {
+      {
+         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+         .dstBinding = 0,
+         .descriptorCount = 1,
+         .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+         .pImageInfo = &(VkDescriptorImageInfo){
+            .imageView = src_view,
+            .imageLayout = layout,
+         },
+      },
+      {
+         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+         .dstBinding = 1,
+         .descriptorCount = 1,
+         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+         .pImageInfo = &(VkDescriptorImageInfo){
+            .imageView = dst_view,
+            .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+         },
+      },
+   };
+   struct anv_push_descriptor_set push_set;
+   astc_emu_init_push_descriptor_set(cmd_buffer,
+                                     &push_set,
+                                     astc_emu->ds_layout,
+                                     ARRAY_SIZE(set_writes),
+                                     set_writes);
+   VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
+
+   anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE,
+                       astc_emu->pipeline);
+   anv_CmdPushConstants(cmd_buffer_, astc_emu->pipeline_layout,
+                        VK_SHADER_STAGE_COMPUTE_BIT, 0,
+                        sizeof(push_const), push_const);
+   anv_CmdBindDescriptorSets(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE,
+                             astc_emu->pipeline_layout, 0, 1, &set,
+                             0, NULL);
+
+   /* each workgroup processes 8x8 texel blocks */
+   rect.extent.width = DIV_ROUND_UP(rect.extent.width, 8);
+   rect.extent.height = DIV_ROUND_UP(rect.extent.height, 8);
+
+   anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0,
+                                           rect.extent.width,
+                                           rect.extent.height,
+                                           1);
+
+   anv_push_descriptor_set_finish(&push_set);
+}
+
+static void
 astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer,
                           VkFormat astc_format,
                           VkImageLayout layout,
@@ -128,6 +385,9 @@ anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
                      VkOffset3D block_offset,
                      VkExtent3D block_extent)
 {
+   const bool flush_denorms =
+      cmd_buffer->device->physical->flush_astc_ldr_void_extent_denorms;
+
    assert(image->emu_plane_format != VK_FORMAT_UNDEFINED);
 
    const VkRect2D rect = {
@@ -165,14 +425,22 @@ anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
                                VK_IMAGE_USAGE_SAMPLED_BIT,
                                subresource->mipLevel, slice_base + i);
       astc_emu_init_image_view(cmd_buffer, &dst_view, image,
-                               VK_FORMAT_R8G8B8A8_UINT,
+                               flush_denorms ? VK_FORMAT_R32G32B32A32_UINT
+                                             : VK_FORMAT_R8G8B8A8_UINT,
                                VK_IMAGE_USAGE_STORAGE_BIT,
                                subresource->mipLevel, slice_base + i);
 
-      astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout,
-                                anv_image_view_to_handle(&src_view),
-                                anv_image_view_to_handle(&dst_view),
-                                rect);
+      if (flush_denorms) {
+         astc_emu_flush_denorm_slice(cmd_buffer, image->vk.format, layout,
+                                     anv_image_view_to_handle(&src_view),
+                                     anv_image_view_to_handle(&dst_view),
+                                     rect);
+      } else {
+         astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout,
+                                   anv_image_view_to_handle(&src_view),
+                                   anv_image_view_to_handle(&dst_view),
+                                   rect);
+      }
    }
 
    anv_cmd_buffer_restore_state(cmd_buffer, &saved);
@@ -184,6 +452,9 @@ anv_device_init_astc_emu(struct anv_device *device)
    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
    VkResult result = VK_SUCCESS;
 
+   if (device->physical->flush_astc_ldr_void_extent_denorms)
+      simple_mtx_init(&astc_emu->mutex, mtx_plain);
+
    if (device->physical->emu_astc_ldr) {
       result = vk_texcompress_astc_init(&device->vk, &device->vk.alloc,
                                         VK_NULL_HANDLE,
@@ -198,6 +469,15 @@ anv_device_finish_astc_emu(struct anv_device *device)
 {
    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
 
+   if (device->physical->flush_astc_ldr_void_extent_denorms) {
+      VkDevice _device = anv_device_to_handle(device);
+
+      anv_DestroyPipeline(_device, astc_emu->pipeline, NULL);
+      anv_DestroyPipelineLayout(_device, astc_emu->pipeline_layout, NULL);
+      anv_DestroyDescriptorSetLayout(_device, astc_emu->ds_layout, NULL);
+      simple_mtx_destroy(&astc_emu->mutex);
+   }
+
    if (astc_emu->texcompress) {
       vk_texcompress_astc_finish(&device->vk, &device->vk.alloc,
                                  astc_emu->texcompress);
index 2904216..055f9a7 100644 (file)
@@ -1362,6 +1362,10 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
    if (!device->has_astc_ldr &&
        driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
       device->emu_astc_ldr = true;
+   if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
+      device->flush_astc_ldr_void_extent_denorms =
+         device->has_astc_ldr && !device->emu_astc_ldr;
+   }
 
    result = anv_physical_device_init_heaps(device, fd);
    if (result != VK_SUCCESS)
index 24d27b5..6ffe7cb 100644 (file)
@@ -2982,14 +2982,21 @@ anv_image_fill_surface_state(struct anv_device *device,
    uint32_t plane = anv_image_aspect_to_plane(image, aspect);
    if (image->emu_plane_format != VK_FORMAT_UNDEFINED) {
       const uint16_t view_bpb = isl_format_get_layout(view_in->format)->bpb;
-      enum isl_format format =
-         image->planes[plane].primary_surface.isl.format;
+      const uint16_t plane_bpb = isl_format_get_layout(
+            image->planes[plane].primary_surface.isl.format)->bpb;
 
-      /* redirect to the hidden plane if not size-compatible */
-      if (isl_format_get_layout(format)->bpb != view_bpb) {
+      /* We should redirect to the hidden plane when the original view format
+       * is compressed or when the view usage is storage.  But we don't always
+       * have visibility to the original view format so we also check for size
+       * compatibility.
+       */
+      if (isl_format_is_compressed(view_in->format) ||
+          (view_usage & ISL_SURF_USAGE_STORAGE_BIT) ||
+          view_bpb != plane_bpb) {
          plane = image->n_planes;
-         format = image->planes[plane].primary_surface.isl.format;
-         assert(isl_format_get_layout(format)->bpb == view_bpb);
+         assert(isl_format_get_layout(
+                  image->planes[plane].primary_surface.isl.format)->bpb ==
+                view_bpb);
       }
    }
 
index 83f9851..15a7461 100644 (file)
@@ -891,6 +891,8 @@ struct anv_physical_device {
 
     /** True if HW supports ASTC LDR */
     bool                                        has_astc_ldr;
+    /** True if denorms in void extents should be flushed to zero */
+    bool                                        flush_astc_ldr_void_extent_denorms;
     /** True if ASTC LDR is supported via emulation */
     bool                                        emu_astc_ldr;
 
@@ -1462,6 +1464,12 @@ enum anv_rt_bvh_build_method {
 
 struct anv_device_astc_emu {
     struct vk_texcompress_astc_state           *texcompress;
+
+    /* for flush_astc_ldr_void_extent_denorms */
+    simple_mtx_t mutex;
+    VkDescriptorSetLayout ds_layout;
+    VkPipelineLayout pipeline_layout;
+    VkPipeline pipeline;
 };
 
 struct anv_device {
@@ -4474,6 +4482,14 @@ vk_format_from_android(unsigned android_format, unsigned android_usage);
 static inline VkFormat
 anv_get_emulation_format(const struct anv_physical_device *pdevice, VkFormat format)
 {
+   if (pdevice->flush_astc_ldr_void_extent_denorms) {
+      const struct util_format_description *desc =
+         vk_format_description(format);
+      if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC &&
+          desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB)
+         return format;
+   }
+
    if (pdevice->emu_astc_ldr)
       return vk_texcompress_astc_emulation_format(format);
 
@@ -4598,7 +4614,7 @@ struct anv_image {
 
    /**
     * If not UNDEFINED, image has a hidden plane at planes[n_planes] for ASTC
-    * LDR emulation.
+    * LDR workaround or emulation.
     */
    VkFormat emu_plane_format;