radeonsi: add a fast path for MSAA resolving with RGB -> BGR swizzling
authorMarek Olšák <marek.olsak@amd.com>
Mon, 8 Mar 2021 11:26:23 +0000 (06:26 -0500)
committerMarge Bot <eric+marge@anholt.net>
Fri, 19 Mar 2021 16:05:03 +0000 (16:05 +0000)
When we encounter a situation when we need to swizzle, which the CB can't
resolve in one pass, swap the channel order on the next clear, so that we
don't have to swizzle.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9615>

src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_clear.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state.c

index f34ecc1..76c1f1e 100644 (file)
@@ -1033,6 +1033,28 @@ static void si_do_CB_resolve(struct si_context *sctx, const struct pipe_blit_inf
    si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */);
 }
 
+static bool resolve_formats_compatible(enum pipe_format src, enum pipe_format dst,
+                                       bool src_swaps_rgb_to_bgr, bool *need_rgb_to_bgr)
+{
+   *need_rgb_to_bgr = false;
+
+   if (src_swaps_rgb_to_bgr) {
+      /* We must only check the swapped format. */
+      enum pipe_format swapped_src = util_format_rgb_to_bgr(src);
+      assert(swapped_src);
+      return util_is_format_compatible(util_format_description(swapped_src),
+                                       util_format_description(dst));
+   }
+
+   if (util_is_format_compatible(util_format_description(src), util_format_description(dst)))
+      return true;
+
+   enum pipe_format swapped_src = util_format_rgb_to_bgr(src);
+   *need_rgb_to_bgr = util_is_format_compatible(util_format_description(swapped_src),
+                                                util_format_description(dst));
+   return *need_rgb_to_bgr;
+}
+
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx, const struct pipe_blit_info *info)
 {
    struct si_context *sctx = (struct si_context *)ctx;
@@ -1059,19 +1081,22 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, const struct pipe
    if (format == PIPE_FORMAT_R16G16_SNORM)
       format = PIPE_FORMAT_R16A16_SNORM;
 
+   bool need_rgb_to_bgr = false;
+
    /* Check the remaining requirements for hw resolve. */
    if (util_max_layer(info->dst.resource, info->dst.level) == 0 && !info->scissor_enable &&
        (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
-       util_is_format_compatible(util_format_description(info->src.format),
-                                 util_format_description(info->dst.format)) &&
+       resolve_formats_compatible(info->src.format, info->dst.format,
+                                  src->swap_rgb_to_bgr, &need_rgb_to_bgr) &&
        dst_width == info->src.resource->width0 && dst_height == info->src.resource->height0 &&
        info->dst.box.x == 0 && info->dst.box.y == 0 && info->dst.box.width == dst_width &&
        info->dst.box.height == dst_height && info->dst.box.depth == 1 && info->src.box.x == 0 &&
        info->src.box.y == 0 && info->src.box.width == dst_width &&
        info->src.box.height == dst_height && info->src.box.depth == 1 && !dst->surface.is_linear &&
        (!dst->cmask_buffer || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
-      /* Check the last constraint. */
-      if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
+      /* Check the remaining constraints. */
+      if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode ||
+          need_rgb_to_bgr) {
          /* The next fast clear will switch to this mode to
           * get direct hw resolve next time if the mode is
           * different now.
@@ -1082,7 +1107,11 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, const struct pipe
           * destination texture instead, but the more general
           * solution is to implement compute shader resolve.
           */
-         src->last_msaa_resolve_target_micro_mode = dst->surface.micro_tile_mode;
+         if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode)
+            src->last_msaa_resolve_target_micro_mode = dst->surface.micro_tile_mode;
+         if (need_rgb_to_bgr)
+            src->swap_rgb_to_bgr_on_next_clear = true;
+
          goto resolve_to_temp;
       }
 
@@ -1128,6 +1157,8 @@ resolve_to_temp:
    if (!tmp)
       return false;
    stmp = (struct si_texture *)tmp;
+   /* Match the channel order of src. */
+   stmp->swap_rgb_to_bgr = src->swap_rgb_to_bgr;
 
    assert(!stmp->surface.is_linear);
    assert(src->surface.micro_tile_mode == stmp->surface.micro_tile_mode);
index e3bb69c..236e180 100644 (file)
@@ -70,6 +70,9 @@ static bool si_set_clear_color(struct si_texture *tex, enum pipe_format surface_
       uc.ui[0] = color->ui[0];
       uc.ui[1] = color->ui[3];
    } else {
+      if (tex->swap_rgb_to_bgr)
+         surface_format = util_format_rgb_to_bgr(surface_format);
+
       util_pack_color_union(surface_format, &uc, color);
    }
 
@@ -419,6 +422,16 @@ static void si_do_fast_color_clear(struct si_context *sctx, unsigned *buffers,
       /* This is only used for MSAA textures when clearing all layers. */
       si_set_optimal_micro_tile_mode(sctx->screen, tex);
 
+      if (tex->swap_rgb_to_bgr_on_next_clear) {
+         assert(!tex->swap_rgb_to_bgr);
+         assert(tex->buffer.b.b.nr_samples >= 2);
+         tex->swap_rgb_to_bgr = true;
+         tex->swap_rgb_to_bgr_on_next_clear = false;
+
+         /* Update all sampler views and images. */
+         p_atomic_inc(&sctx->screen->dirty_tex_counter);
+      }
+
       /* only supported on tiled surfaces */
       if (tex->surface.is_linear) {
          continue;
index cb52295..daf75fc 100644 (file)
@@ -428,6 +428,16 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
       state[4] &= C_008F20_PITCH;
       state[4] |= S_008F20_PITCH(pitch - 1);
    }
+
+   if (tex->swap_rgb_to_bgr) {
+      unsigned swizzle_x = G_008F1C_DST_SEL_X(state[3]);
+      unsigned swizzle_z = G_008F1C_DST_SEL_Z(state[3]);
+
+      state[3] &= C_008F1C_DST_SEL_X;
+      state[3] |= S_008F1C_DST_SEL_X(swizzle_z);
+      state[3] &= C_008F1C_DST_SEL_Z;
+      state[3] |= S_008F1C_DST_SEL_Z(swizzle_x);
+   }
 }
 
 static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
index 3673af9..1d1f073 100644 (file)
@@ -355,6 +355,8 @@ struct si_texture {
    unsigned cb_color_info; /* fast clear enable bit */
    unsigned color_clear_value[2];
    unsigned last_msaa_resolve_target_micro_mode;
+   bool swap_rgb_to_bgr_on_next_clear;
+   bool swap_rgb_to_bgr;
    unsigned num_level0_transfers;
    unsigned plane_index; /* other planes are different pipe_resources */
    unsigned num_planes;
index 332ef80..cc4c9f6 100644 (file)
@@ -2995,6 +2995,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
       cb_color_info = cb->cb_color_info | tex->cb_color_info;
       cb_color_attrib = cb->cb_color_attrib;
 
+      if (tex->swap_rgb_to_bgr) {
+         /* Swap R and B channels. */
+         static unsigned rgb_to_bgr[4] = {
+            [V_028C70_SWAP_STD] = V_028C70_SWAP_ALT,
+            [V_028C70_SWAP_ALT] = V_028C70_SWAP_STD,
+            [V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV,
+            [V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV,
+         };
+         unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)];
+
+         cb_color_info &= C_028C70_COMP_SWAP;
+         cb_color_info |= S_028C70_COMP_SWAP(swap);
+      }
+
       if (cb->base.u.tex.level > 0)
          cb_color_info &= C_028C70_FAST_CLEAR;