st/mesa, iris: Add optional CPU-based ASTC void extent denorm flushing
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 15 Mar 2023 22:19:42 +0000 (15:19 -0700)
committerMarge Bot <emma+marge@anholt.net>
Fri, 17 Mar 2023 21:30:48 +0000 (21:30 +0000)
Intel Gen9 GPUs have hardware ASTC support, but have a bug where they
don't handle denormalized values in void extent blocks correctly.  This
isn't that hard to work around - on upload, we can detect such blocks,
and flush any denorms to zero.  Because we're altering the data behind
the application's back, and applications can theoretically ask to
download the original unaltered image data, we unfortunately need to
maintain shadow copies of the data.

To make sure that we don't accidentally skip the void-extent flushing
via any fast-upload paths, and support download correctly, we plug this
into the st/mesa compressed texture format fallback paths, which store
a CPU copy of the original image data, and upload altered data.

This is unfortunately common code for what's likely to be a single
driver's issue (on a single generation), but it beats replicating an
entire framework we already have inside the driver.

Fixes dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.*
using iris on Intel Gen9 GPUs.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4167
Reviewed-by: Emma Anholt <emma@anholt.net>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Acked-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21943>

12 files changed:
docs/gallium/screen.rst
src/gallium/auxiliary/util/u_screen.c
src/gallium/drivers/iris/iris_screen.c
src/gallium/include/pipe/p_defines.h
src/intel/ci/iris-amly-fails.txt
src/intel/ci/iris-cml-fails.txt
src/intel/ci/iris-kbl-fails.txt
src/intel/ci/iris-whl-fails.txt
src/mesa/state_tracker/st_cb_texture.c
src/mesa/state_tracker/st_context.c
src/mesa/state_tracker/st_context.h
src/mesa/state_tracker/st_format.c

index cb66016..2ef0c97 100644 (file)
@@ -647,6 +647,7 @@ The integer capabilities:
 * ``PIPE_CAP_ALLOW_GLTHREAD_BUFFER_SUBDATA_OPT``: Whether to allow glthread to convert glBufferSubData to glCopyBufferSubData. This may improve or worsen performance depending on your driver.
 * ``PIPE_CAP_VALIDATE_ALL_DIRTY_STATES`` : Whether state validation must also validate the state changes for resources types used in the previous shader but not in the current shader.
 * ``PIPE_CAP_NULL_TEXTURES`` : Whether the driver supports sampling from NULL textures.
+* ``PIPE_CAP_ASTC_VOID_EXTENTS_NEED_DENORM_FLUSH`` : True if the driver/hardware needs denormalized values in ASTC void extent blocks flushed to zero.
 
 .. _pipe_capf:
 
index c0ed2f0..22c20e0 100644 (file)
@@ -537,6 +537,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
 
    case PIPE_CAP_VALIDATE_ALL_DIRTY_STATES:
    case PIPE_CAP_NULL_TEXTURES:
+   case PIPE_CAP_ASTC_VOID_EXTENTS_NEED_DENORM_FLUSH:
       return 0;
 
    default:
index ebf3611..bc938b7 100644 (file)
@@ -434,6 +434,9 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_DEVICE_PROTECTED_CONTEXT:
       return screen->kernel_features & KERNEL_HAS_PROTECTED_CONTEXT;
 
+   case PIPE_CAP_ASTC_VOID_EXTENTS_NEED_DENORM_FLUSH:
+      return devinfo->ver == 9 && !intel_device_info_is_9lp(devinfo);
+
    default:
       return u_pipe_screen_get_param_defaults(pscreen, param);
    }
index 250fde8..b122cc5 100644 (file)
@@ -1019,6 +1019,7 @@ enum pipe_cap
    PIPE_CAP_DEVICE_PROTECTED_CONTEXT,
    PIPE_CAP_ALLOW_GLTHREAD_BUFFER_SUBDATA_OPT,
    PIPE_CAP_NULL_TEXTURES,
+   PIPE_CAP_ASTC_VOID_EXTENTS_NEED_DENORM_FLUSH,
 
    PIPE_CAP_VALIDATE_ALL_DIRTY_STATES,
    PIPE_CAP_LAST,
index 6e2f988..c869fb9 100644 (file)
@@ -1,18 +1,3 @@
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4167
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x8,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x12,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.4x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.5x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x8,Fail
-
 # EGL_BAD_DISPLAY is generated if display is not an EGL display connection
 # eglCreatePixmapSurface(0, 0x0000000000000000, 0x0000000000000000, { EGL_NONE });
 # // 0x0000000000000000 returned
index f64cd97..6a80763 100644 (file)
@@ -1,18 +1,3 @@
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4167
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x8,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x12,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.4x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.5x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x8,Fail
-
 # New CTS failures in 1.3.5.0
 KHR-GL46.polygon_offset_clamp.PolygonOffsetClampMinMax,Fail
 KHR-GL46.polygon_offset_clamp.PolygonOffsetClampZeroInfinity,Fail
index d69edab..04fdf73 100644 (file)
@@ -1,18 +1,3 @@
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4167
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x8,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x12,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.4x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.5x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x8,Fail
-
 # X Error of failed request:  BadMatch (invalid parameter attributes)
 #   Major opcode of failed request:  151 (GLX)
 #   Minor opcode of failed request:  11 (X_GLXSwapBuffers)
index f64cd97..6a80763 100644 (file)
@@ -1,18 +1,3 @@
-# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4167
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.10x8,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x10,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.12x12,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.4x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.5x4,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.6x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x5,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x6,Fail
-dEQP-GLES3.functional.texture.compressed.astc.void_extent_ldr.8x8,Fail
-
 # New CTS failures in 1.3.5.0
 KHR-GL46.polygon_offset_clamp.PolygonOffsetClampMinMax,Fail
 KHR-GL46.polygon_offset_clamp.PolygonOffsetClampZeroInfinity,Fail
index 491342d..f125344 100644 (file)
@@ -429,6 +429,9 @@ st_astc_format_fallback(const struct st_context *st, mesa_format format)
    if (!_mesa_is_format_astc_2d(format))
       return false;
 
+   if (st->astc_void_extents_need_denorm_flush && !util_format_is_srgb(format))
+      return true;
+
    if (format == MESA_FORMAT_RGBA_ASTC_5x5 ||
        format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
       return !st->has_astc_5x5_ldr;
@@ -556,6 +559,64 @@ log_unmap_time_delta(const struct pipe_box *box,
              pathname, os_time_get() - start_us);
 }
 
+/**
+ * Upload ASTC data but flush denorms in any void extent blocks.
+ */
+static void
+upload_astc_slice_with_flushed_void_extents(uint8_t *dst,
+                                            unsigned dst_stride,
+                                            const uint8_t *src,
+                                            unsigned src_stride,
+                                            unsigned src_width,
+                                           unsigned src_height,
+                                           mesa_format format)
+{
+   unsigned blk_w, blk_h;
+   _mesa_get_format_block_size(format, &blk_w, &blk_h);
+
+   unsigned x_blocks = (src_width + blk_w - 1) / blk_w;
+   unsigned y_blocks = (src_height + blk_h - 1) / blk_h;
+
+   for (int y = 0; y < y_blocks; y++) {
+      /* An ASTC block is stored in little endian mode. The byte that
+       * contains bits 0..7 is stored at the lower address in memory.
+       */
+      struct astc_block {
+         uint16_t header;
+         uint16_t dontcare0;
+         uint16_t dontcare1;
+         uint16_t dontcare2;
+         uint16_t R;
+         uint16_t G;
+         uint16_t B;
+         uint16_t A;
+      } *blocks = (struct astc_block *) src;
+
+      /* Iterate over every copied block in the row */
+      for (int x = 0; x < x_blocks; x++) {
+         /* Check if the header matches that of an LDR void-extent block */
+         if ((blocks[x].header & 0xfff) == 0xDFC) {
+            struct astc_block flushed_block = {
+               .header = blocks[x].header,
+               .dontcare0 = blocks[x].dontcare0,
+               .dontcare1 = blocks[x].dontcare1,
+               .dontcare2 = blocks[x].dontcare2,
+               .R = blocks[x].R < 4 ? 0 : blocks[x].R,
+               .G = blocks[x].G < 4 ? 0 : blocks[x].G,
+               .B = blocks[x].B < 4 ? 0 : blocks[x].B,
+               .A = blocks[x].A < 4 ? 0 : blocks[x].A,
+            };
+            memcpy(&dst[x * 16], &flushed_block, 16);
+         } else {
+            memcpy(&dst[x * 16], &blocks[x], 16);
+         }
+      }
+
+      dst += dst_stride;
+      src += src_stride;
+   }
+}
+
 void
 st_UnmapTextureImage(struct gl_context *ctx,
                      struct gl_texture_image *texImage,
@@ -577,6 +638,7 @@ st_UnmapTextureImage(struct gl_context *ctx,
          const int64_t unmap_start_us = log_unmap_time ? os_time_get() : 0;
 
          if (_mesa_is_format_astc_2d(texImage->TexFormat) &&
+             !_mesa_is_format_astc_2d(texImage->pt->format) &&
              util_format_is_compressed(texImage->pt->format)) {
 
             /* DXT5 is the only supported transcode target from ASTC. */
@@ -627,7 +689,15 @@ st_UnmapTextureImage(struct gl_context *ctx,
 
          assert(z == transfer->box.z);
 
-         if (util_format_is_compressed(texImage->pt->format)) {
+         if (_mesa_is_format_astc_2d(texImage->pt->format)) {
+            assert(st->astc_void_extents_need_denorm_flush);
+            upload_astc_slice_with_flushed_void_extents(map, transfer->stride,
+                                                        itransfer->temp_data,
+                                                        itransfer->temp_stride,
+                                                        transfer->box.width,
+                                                        transfer->box.height,
+                                                        texImage->pt->format);
+         } else if (util_format_is_compressed(texImage->pt->format)) {
             /* Transcode into a different compressed format. */
             unsigned size =
                _mesa_format_image_size(PIPE_FORMAT_R8G8B8A8_UNORM,
index a60d184..b1d111f 100644 (file)
@@ -566,6 +566,9 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
    st->has_astc_5x5_ldr =
       screen->is_format_supported(screen, PIPE_FORMAT_ASTC_5x5_SRGB,
                                   PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_SAMPLER_VIEW);
+   st->astc_void_extents_need_denorm_flush =
+      screen->get_param(screen, PIPE_CAP_ASTC_VOID_EXTENTS_NEED_DENORM_FLUSH);
+
    st->has_s3tc = screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA,
                                               PIPE_TEXTURE_2D, 0, 0,
                                               PIPE_BIND_SAMPLER_VIEW);
index 3f1d949..ce7d2fd 100644 (file)
@@ -148,6 +148,7 @@ struct st_context
    boolean transcode_astc;
    boolean has_astc_2d_ldr;
    boolean has_astc_5x5_ldr;
+   boolean astc_void_extents_need_denorm_flush;
    boolean has_s3tc;
    boolean has_rgtc;
    boolean has_latc;
index 1803662..2e5e6fa 100644 (file)
@@ -110,6 +110,15 @@ st_mesa_format_to_pipe_format(const struct st_context *st,
    }
 
    if (st_astc_format_fallback(st, mesaFormat)) {
+      const bool is_5x5 = mesaFormat == PIPE_FORMAT_ASTC_5x5 ||
+                          mesaFormat == PIPE_FORMAT_ASTC_5x5_SRGB;
+
+      /* If we're only emulating ASTC void extents, use the original format */
+      if (st->astc_void_extents_need_denorm_flush &&
+          (is_5x5 ? st->has_astc_5x5_ldr : st->has_astc_2d_ldr))
+         return mesaFormat;
+
+      /* We're emulating all of ASTC via transcoding or decompression */
       if (_mesa_is_format_srgb(mesaFormat)) {
          return st->transcode_astc ? PIPE_FORMAT_DXT5_SRGBA :
                                      PIPE_FORMAT_R8G8B8A8_SRGB;