asahi: Implement custom border colours
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sat, 7 Jan 2023 21:49:27 +0000 (16:49 -0500)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sat, 4 Feb 2023 15:37:02 +0000 (10:37 -0500)
Implement custom border colours, as required by OpenGL's CLAMP_TO_BORDER and
Vulkan with customBorderColor. This uses an extended sampler descriptor, which
has space for the custom border values. The trouble is that the border must be
packed into an internal interchange format that depends on the original format
in a complex way. That said, we're not solving NP-complete problems here, and it
passes the tests (dEQP-GLES31.functional.texture.border_clamp.* and piglit
texwrap).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20570>

src/asahi/lib/agx_border.c [new file with mode: 0644]
src/asahi/lib/agx_formats.h
src/asahi/lib/meson.build
src/gallium/drivers/asahi/agx_pipe.c
src/gallium/drivers/asahi/agx_state.c
src/gallium/drivers/asahi/agx_state.h

diff --git a/src/asahi/lib/agx_border.c b/src/asahi/lib/agx_border.c
new file mode 100644 (file)
index 0000000..0cbf474
--- /dev/null
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2022 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "util/format/format_utils.h"
+#include "util/format/u_format.h"
+#include "util/half_float.h"
+#include "agx_formats.h"
+#include "agx_pack.h"
+
+/*
+ * AGX allows the sampler descriptor to specify a custom border colour. The
+ * packing depends on the texture format (i.e. no
+ * customBorderColorWithoutFormat).
+ *
+ * Each channel is packed separately into 32-bit words. Pure integers are stored
+ * as-is. Pure floats are extended to 16-bit/32-bit as appropriate. Normalized
+ * formats are encoded as usual, except sRGB gets 4 extra bits.
+ *
+ * The texture descriptor swizzle is applied to the border colour. That swizzle
+ * includes the format swizzle. In effect, we want to encode the border colour
+ * like it would be encoded in memory, and then the swizzles work out
+ * for Vulkan.
+ */
+
+struct channel {
+   enum util_format_type type;
+   bool normalized;
+   unsigned size;
+};
+
+static struct channel
+get_channel_info(enum pipe_format format, unsigned channel)
+{
+   /* Compressed formats may have packing with no PIPE equivalent, handle
+    * specially.
+    */
+   switch (format) {
+   case PIPE_FORMAT_ETC2_R11_UNORM:
+   case PIPE_FORMAT_ETC2_RG11_UNORM:
+      return (struct channel){UTIL_FORMAT_TYPE_UNSIGNED, true, 11};
+
+   case PIPE_FORMAT_ETC2_R11_SNORM:
+   case PIPE_FORMAT_ETC2_RG11_SNORM:
+      return (struct channel){UTIL_FORMAT_TYPE_SIGNED, true, 11};
+
+   case PIPE_FORMAT_RGTC1_UNORM:
+   case PIPE_FORMAT_RGTC2_UNORM:
+      return (struct channel){UTIL_FORMAT_TYPE_UNSIGNED, true, 14};
+   case PIPE_FORMAT_RGTC1_SNORM:
+   case PIPE_FORMAT_RGTC2_SNORM:
+      return (struct channel){UTIL_FORMAT_TYPE_SIGNED, true, 14};
+
+   case PIPE_FORMAT_ETC1_RGB8:
+   case PIPE_FORMAT_ETC2_RGB8:
+   case PIPE_FORMAT_ETC2_RGBA8:
+   case PIPE_FORMAT_ETC2_RGB8A1:
+   case PIPE_FORMAT_BPTC_RGBA_UNORM:
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+   case PIPE_FORMAT_DXT3_RGBA:
+   case PIPE_FORMAT_DXT5_RGBA:
+      return (struct channel){UTIL_FORMAT_TYPE_UNSIGNED, true, 8};
+
+   case PIPE_FORMAT_ETC2_SRGB8:
+   case PIPE_FORMAT_ETC2_SRGBA8:
+   case PIPE_FORMAT_ETC2_SRGB8A1:
+   case PIPE_FORMAT_BPTC_SRGBA:
+   case PIPE_FORMAT_DXT1_SRGB:
+   case PIPE_FORMAT_DXT1_SRGBA:
+   case PIPE_FORMAT_DXT3_SRGBA:
+   case PIPE_FORMAT_DXT5_SRGBA:
+      return (struct channel){
+         UTIL_FORMAT_TYPE_UNSIGNED,
+         true,
+         channel == 3 ? 8 : 12,
+      };
+
+   case PIPE_FORMAT_BPTC_RGB_FLOAT:
+   case PIPE_FORMAT_BPTC_RGB_UFLOAT:
+      return (struct channel){UTIL_FORMAT_TYPE_FLOAT, false, 16};
+
+   default:
+      assert(
+         !util_format_is_compressed(format) &&
+         "Other compressed formats must be special cased for border colours."
+         "Add more cases if we have a use case");
+
+      break;
+   }
+
+   const struct util_format_description *desc = util_format_description(format);
+   struct util_format_channel_description chan_desc = desc->channel[channel];
+   bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) &&
+               (desc->swizzle[channel] <= PIPE_SWIZZLE_Z);
+
+   if (chan_desc.type == UTIL_FORMAT_TYPE_UNSIGNED ||
+       chan_desc.type == UTIL_FORMAT_TYPE_SIGNED) {
+
+      assert((chan_desc.normalized ^ chan_desc.pure_integer) &&
+             "no SCALED formats supported for texturing");
+   }
+
+   if (srgb && chan_desc.type != UTIL_FORMAT_TYPE_VOID) {
+      assert(chan_desc.normalized && chan_desc.size == 8 &&
+             chan_desc.type == UTIL_FORMAT_TYPE_UNSIGNED &&
+             "only 8-bit unorm supported with sRGB");
+   }
+
+   return (struct channel){
+      .type = chan_desc.type,
+      .normalized = chan_desc.normalized,
+      .size = srgb ? 12 : chan_desc.size,
+   };
+}
+
+static uint32_t
+pack_channel(uint32_t value, enum pipe_format format, unsigned channel)
+{
+   struct channel chan = get_channel_info(format, channel);
+
+   switch (chan.type) {
+   case UTIL_FORMAT_TYPE_VOID:
+      return 0;
+
+   case UTIL_FORMAT_TYPE_UNSIGNED:
+      if (chan.normalized)
+         return _mesa_float_to_unorm(uif(value), chan.size);
+      else
+         return _mesa_unsigned_to_unsigned(value, chan.size);
+
+   case UTIL_FORMAT_TYPE_SIGNED:
+      if (chan.normalized)
+         return _mesa_float_to_snorm(uif(value), chan.size);
+      else
+         return _mesa_signed_to_signed(value, chan.size);
+
+   case UTIL_FORMAT_TYPE_FLOAT:
+      assert(chan.size == 32 || chan.size <= 16);
+      return chan.size == 32 ? value : _mesa_float_to_half(uif(value));
+
+   case UTIL_FORMAT_TYPE_FIXED:
+      unreachable("no FIXED textures");
+   }
+
+   unreachable("invalid format type");
+}
+
+void
+agx_pack_border(struct agx_border_packed *out, const uint32_t in[4],
+                enum pipe_format format)
+{
+   assert(format != PIPE_FORMAT_NONE);
+
+   const struct util_format_description *desc = util_format_description(format);
+   uint8_t channel_map[4] = {0};
+
+   /* Determine the in-memory order of the format. That is the inverse of the
+    * format swizzle. If a component is replicated, we use the first component,
+    * by looping backwards and overwriting.
+    */
+   for (int i = 3; i >= 0; --i) {
+      static_assert(PIPE_SWIZZLE_X == 0, "known ordering");
+      static_assert(PIPE_SWIZZLE_W == 3, "known ordering");
+
+      if (desc->swizzle[i] <= PIPE_SWIZZLE_W)
+         channel_map[i] = desc->swizzle[i];
+   }
+
+   agx_pack(out, BORDER, cfg) {
+      cfg.channel_0 = pack_channel(in[channel_map[0]], format, 0);
+      cfg.channel_1 = pack_channel(in[channel_map[1]], format, 1);
+      cfg.channel_2 = pack_channel(in[channel_map[2]], format, 2);
+      cfg.channel_3 = pack_channel(in[channel_map[3]], format, 3);
+   }
+}
index b463418..4944d8c 100644 (file)
@@ -46,4 +46,9 @@ agx_is_valid_pixel_format(enum pipe_format format)
    return ((entry.channels | entry.type) != 0) || entry.renderable;
 }
 
+struct agx_border_packed;
+
+void agx_pack_border(struct agx_border_packed *out, const uint32_t in[4],
+                     enum pipe_format format);
+
 #endif
index 4c9e106..054ddd7 100644 (file)
@@ -29,6 +29,7 @@ endif
 
 libasahi_lib_files = files(
   'agx_bo.c',
+  'agx_border.c',
   agx_device,
   'agx_formats.c',
   'agx_meta.c',
index 2a84b84..3efeed6 100644 (file)
@@ -1375,6 +1375,9 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_NIR_IMAGES_AS_DEREF:
       return 0;
 
+   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+      return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
+
    case PIPE_CAP_SUPPORTED_PRIM_MODES:
    case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART:
       return BITFIELD_BIT(PIPE_PRIM_POINTS) | BITFIELD_BIT(PIPE_PRIM_LINES) |
index 964ded4..e321875 100644 (file)
@@ -44,6 +44,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
+#include "util/format_srgb.h"
+#include "util/half_float.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_prim.h"
@@ -414,6 +416,29 @@ static const enum agx_compare_func agx_compare_funcs[PIPE_FUNC_ALWAYS + 1] = {
    [PIPE_FUNC_ALWAYS] = AGX_COMPARE_FUNC_ALWAYS,
 };
 
+static enum pipe_format
+fixup_border_zs(enum pipe_format orig, union pipe_color_union *c)
+{
+   switch (orig) {
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+   case PIPE_FORMAT_Z24X8_UNORM:
+      /* Z24 is internally promoted to Z32F via transfer_helper. These formats
+       * are normalized so should get clamped, but Z32F does not get clamped, so
+       * we clamp here.
+       */
+      c->f[0] = SATURATE(c->f[0]);
+      return PIPE_FORMAT_Z32_FLOAT;
+
+   case PIPE_FORMAT_X24S8_UINT:
+   case PIPE_FORMAT_X32_S8X24_UINT:
+      /* Separate stencil is internally promoted */
+      return PIPE_FORMAT_S8_UINT;
+
+   default:
+      return orig;
+   }
+}
+
 static void *
 agx_create_sampler_state(struct pipe_context *pctx,
                          const struct pipe_sampler_state *state)
@@ -445,6 +470,20 @@ agx_create_sampler_state(struct pipe_context *pctx,
       cfg.seamful_cube_maps =
          !(agx_device(pctx->screen)->debug & AGX_DBG_DEQP) ||
          !state->seamless_cube_map;
+
+      if (state->border_color_format != PIPE_FORMAT_NONE) {
+         /* TODO: Optimize to use compact descriptors for black/white borders */
+         so->uses_custom_border = true;
+         cfg.border_colour = AGX_BORDER_COLOUR_CUSTOM;
+      }
+   }
+
+   if (so->uses_custom_border) {
+      union pipe_color_union border = state->border_color;
+      enum pipe_format format =
+         fixup_border_zs(state->border_color_format, &border);
+
+      agx_pack_border(&so->border, border.ui, format);
    }
 
    return so;
@@ -476,6 +515,14 @@ agx_bind_sampler_states(struct pipe_context *pctx, enum pipe_shader_type shader,
 
    ctx->stage[shader].sampler_count =
       util_last_bit(ctx->stage[shader].valid_samplers);
+
+   /* Recalculate whether we need custom borders */
+   ctx->stage[shader].custom_borders = false;
+
+   u_foreach_bit(i, ctx->stage[shader].valid_samplers) {
+      if (ctx->stage[shader].samplers[i]->uses_custom_border)
+         ctx->stage[shader].custom_borders = true;
+   }
 }
 
 /* Channels agree for RGBA but are weird for force 0/1 */
@@ -1527,15 +1574,18 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs,
    struct agx_context *ctx = batch->ctx;
    unsigned nr_textures = ctx->stage[stage].texture_count;
    unsigned nr_samplers = ctx->stage[stage].sampler_count;
+   bool custom_borders = ctx->stage[stage].custom_borders;
 
    struct agx_ptr T_tex = agx_pool_alloc_aligned(
       &batch->pool, AGX_TEXTURE_LENGTH * nr_textures, 64);
 
-   struct agx_ptr T_samp = agx_pool_alloc_aligned(
-      &batch->pool, AGX_SAMPLER_LENGTH * nr_samplers, 64);
+   size_t sampler_length =
+      AGX_SAMPLER_LENGTH + (custom_borders ? AGX_BORDER_LENGTH : 0);
+
+   struct agx_ptr T_samp =
+      agx_pool_alloc_aligned(&batch->pool, sampler_length * nr_samplers, 64);
 
    struct agx_texture_packed *textures = T_tex.cpu;
-   struct agx_sampler_packed *samplers = T_samp.cpu;
 
    /* TODO: Dirty track me to save some CPU cycles and maybe improve caching */
    for (unsigned i = 0; i < nr_textures; ++i) {
@@ -1569,13 +1619,25 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs,
    }
 
    /* TODO: Dirty track me to save some CPU cycles and maybe improve caching */
+   uint8_t *out_sampler = T_samp.cpu;
    for (unsigned i = 0; i < nr_samplers; ++i) {
       struct agx_sampler_state *sampler = ctx->stage[stage].samplers[i];
+      struct agx_sampler_packed *out = (struct agx_sampler_packed *)out_sampler;
 
-      if (sampler)
-         samplers[i] = sampler->desc;
-      else
-         memset(&samplers[i], 0, sizeof(samplers[i]));
+      if (sampler) {
+         *out = sampler->desc;
+
+         if (custom_borders) {
+            memcpy(out_sampler + AGX_SAMPLER_LENGTH, &sampler->border,
+                   AGX_BORDER_LENGTH);
+         } else {
+            assert(!sampler->uses_custom_border && "invalid combination");
+         }
+      } else {
+         memset(out, 0, sampler_length);
+      }
+
+      out_sampler += sampler_length;
    }
 
    struct agx_usc_builder b =
@@ -1891,8 +1953,8 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines,
          cfg.uniform_register_count = ctx->vs->info.push_count;
          cfg.preshader_register_count = ctx->vs->info.nr_preamble_gprs;
          cfg.texture_state_register_count = tex_count;
-         cfg.sampler_state_register_count =
-            agx_translate_sampler_state_count(tex_count, false);
+         cfg.sampler_state_register_count = agx_translate_sampler_state_count(
+            tex_count, ctx->stage[PIPE_SHADER_VERTEX].custom_borders);
       }
       out += AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH;
 
@@ -2067,14 +2129,15 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines,
 
    if (dirty.fragment_shader) {
       unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
+
       agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) {
          cfg.pipeline =
             agx_build_pipeline(batch, ctx->fs, PIPE_SHADER_FRAGMENT),
          cfg.uniform_register_count = ctx->fs->info.push_count;
          cfg.preshader_register_count = ctx->fs->info.nr_preamble_gprs;
          cfg.texture_state_register_count = frag_tex_count;
-         cfg.sampler_state_register_count =
-            agx_translate_sampler_state_count(frag_tex_count, false);
+         cfg.sampler_state_register_count = agx_translate_sampler_state_count(
+            frag_tex_count, ctx->stage[PIPE_SHADER_FRAGMENT].custom_borders);
          cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings;
          cfg.cf_bindings = batch->varyings;
 
index 380f9c2..8c6cddc 100644 (file)
@@ -135,6 +135,9 @@ struct agx_stage {
    struct agx_sampler_state *samplers[PIPE_MAX_SAMPLERS];
    struct agx_sampler_view *textures[PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
+   /* Does any bound sampler require custom border colours? */
+   bool custom_borders;
+
    unsigned sampler_count, texture_count;
    uint32_t valid_samplers;
 };
@@ -341,6 +344,12 @@ struct agx_sampler_state {
 
    /* Prepared descriptor */
    struct agx_sampler_packed desc;
+
+   /* Whether a custom border colour is required */
+   bool uses_custom_border;
+
+   /* Packed custom border colour, or zero if none is required */
+   struct agx_border_packed border;
 };
 
 struct agx_sampler_view {