From d038adca0eae173be0d2082f9b6d7e6d7c8aadf1 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 8 Sep 2016 22:18:47 -0700
Subject: [PATCH] intel/isl: Add support for RGB formats in X and Y-tiled
 memory

Normally, using a non-linear tiling format helps improve cache locality by
ensuring that neighboring pixels are usually close-by in memory.  For RGB
formats, this still sort-of holds, but it can also lead to rather terrible
memory access patterns where a single RGB pixel value crosses a tile
boundary and gets split into two pieces in different 4K pages.  It also
makes for some rather awkward calculations because your tile size is no
longer an even multiple of surface element size.  For these reasons, we
chose to simply never create tiled RGB images in the Vulkan driver.

The GL driver, however, is not so kind so we need to support it somehow.  I
briefly toyed with a couple of different schemes but this is the best one I
could come up with.  The fundamental problem is that a tile no longer
contains an integer number of surface elements.  I briefly considered a
couple other options but found them wanting:

 1) Using floats for the logical tile size.  This leads to potential
    rounding error problems.

 2) When presented with a RGB format, just make the tile 3-times as wide.
    This isn't so nice because now our tiles are no longer power-of-two
    size.  Also, it can force the row_pitch to be larger than needed which,
    while not strictly a problem for ISL, causes incompatibility problems
    with the way the GL driver chooses surface pitches.

The chosen method requires that you pay attention and not just assume that
your tile_info is in the units you think it is.  However, it's nice because
it provides a nice "these are the units" declaration in isl_tile_info
itself.  Previously, the tile_info wasn't usable as a stand-alone structure
because you had to also know the format.  It also forces figuring out how
to deal with inconsistencies between tiling and format back to the caller
which is good because the two different consumers of isl_tile_info really
want to deal with it differently:  Computation of the surface size wants
the fewest number of horizontal tiles possible while get_intratile_offset
is far more concerned with things aligning nicely.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Chad Versace <chadversary@chromium.org>
---
 src/intel/isl/isl.c | 48 +++++++++++++++++++++++++++++++++++-------------
 src/intel/isl/isl.h | 21 ++++++++++++++++++++-
 2 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index c19ef59..a883aec 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -113,7 +113,16 @@ isl_tiling_get_info(const struct isl_device *dev,
    const uint32_t bs = format_bpb / 8;
    struct isl_extent2d logical_el, phys_B;
 
-   assert(tiling == ISL_TILING_LINEAR || isl_is_pow2(format_bpb));
+   if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
+      /* It is possible to have non-power-of-two formats in a tiled buffer.
+       * The easiest way to handle this is to treat the tile as if it is three
+       * times as wide.  This way no pixel will ever cross a tile boundary.
+       * This really only works on legacy X and Y tiling formats.
+       */
+      assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
+      assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
+      return isl_tiling_get_info(dev, tiling, format_bpb / 3, tile_info);
+   }
 
    switch (tiling) {
    case ISL_TILING_LINEAR:
@@ -209,6 +218,7 @@ isl_tiling_get_info(const struct isl_device *dev,
 
    *tile_info = (struct isl_tile_info) {
       .tiling = tiling,
+      .format_bpb = format_bpb,
       .logical_extent_el = logical_el,
       .phys_extent_B = phys_B,
    };
@@ -1215,14 +1225,19 @@ isl_surf_init_s(const struct isl_device *dev,
       }
       base_alignment = isl_round_up_to_power_of_two(base_alignment);
    } else {
+      assert(fmtl->bpb % tile_info.format_bpb == 0);
+      const uint32_t tile_el_scale = fmtl->bpb / tile_info.format_bpb;
+
       assert(phys_slice0_sa.w % fmtl->bw == 0);
       const uint32_t total_w_el = phys_slice0_sa.width / fmtl->bw;
       const uint32_t total_w_tl =
-         isl_align_div(total_w_el, tile_info.logical_extent_el.width);
+         isl_align_div(total_w_el * tile_el_scale,
+                       tile_info.logical_extent_el.width);
 
       row_pitch = total_w_tl * tile_info.phys_extent_B.width;
       if (row_pitch < info->min_pitch) {
-         row_pitch = isl_align(info->min_pitch, tile_info.phys_extent_B.width);
+         row_pitch = isl_align_npot(info->min_pitch,
+                                    tile_info.phys_extent_B.width);
       }
 
       total_h_el += isl_align_div_npot(pad_bytes, row_pitch);
@@ -1679,14 +1694,6 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
                                    uint32_t *x_offset_el,
                                    uint32_t *y_offset_el)
 {
-   /* This function only really works for power-of-two surfaces.  In
-    * theory, we could make it work for non-power-of-two surfaces by going
-    * to the left until we find a block that is bs-aligned.  The Vulkan
-    * driver doesn't use non-power-of-two tiled surfaces so we'll leave
-    * this unimplemented for now.
-    */
-   assert(tiling == ISL_TILING_LINEAR || isl_is_pow2(bs));
-
    if (tiling == ISL_TILING_LINEAR) {
       *base_address_offset = total_y_offset_el * row_pitch +
                              total_x_offset_el * bs;
@@ -1695,8 +1702,24 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
       return;
    }
 
+   const uint32_t bpb = bs * 8;
+
    struct isl_tile_info tile_info;
-   isl_tiling_get_info(dev, tiling, bs * 8, &tile_info);
+   isl_tiling_get_info(dev, tiling, bpb, &tile_info);
+
+   assert(row_pitch % tile_info.phys_extent_B.width == 0);
+
+   /* For non-power-of-two formats, we need the address to be both tile and
+    * element-aligned.  The easiest way to achieve this is to work with a tile
+    * that is three times as wide as the regular tile.
+    *
+    * The tile info returned by get_tile_info has a logical size that is an
+    * integer number of tile_info.format_bpb size elements.  To scale the
+    * tile, we scale up the physical width and then treat the logical tile
+    * size as if it has bpb size elements.
+    */
+   const uint32_t tile_el_scale = bpb / tile_info.format_bpb;
+   tile_info.phys_extent_B.width *= tile_el_scale;
 
    /* Compute the offset into the tile */
    *x_offset_el = total_x_offset_el % tile_info.logical_extent_el.w;
@@ -1706,7 +1729,6 @@ isl_tiling_get_intratile_offset_el(const struct isl_device *dev,
    uint32_t x_offset_tl = total_x_offset_el / tile_info.logical_extent_el.w;
    uint32_t y_offset_tl = total_y_offset_el / tile_info.logical_extent_el.h;
 
-   assert(row_pitch % tile_info.phys_extent_B.width == 0);
    *base_address_offset =
       y_offset_tl * tile_info.phys_extent_B.h * row_pitch +
       x_offset_tl * tile_info.phys_extent_B.h * tile_info.phys_extent_B.w;
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 3604fad..d2f0e16 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -728,7 +728,26 @@ struct isl_format_layout {
 struct isl_tile_info {
    enum isl_tiling tiling;
 
-   /** The logical size of the tile in units of surface elements
+   /* The size (in bits per block) of a single surface element
+    *
+    * For surfaces with power-of-two formats, this is the same as
+    * isl_format_layout::bpb.  For non-power-of-two formats it may be smaller.
+    * The logical_extent_el field is in terms of elements of this size.
+    *
+    * For example, consider ISL_FORMAT_R32G32B32_FLOAT for which
+    * isl_format_layout::bpb is 96 (a non-power-of-two).  In this case, none
+    * of the tiling formats can actually hold an integer number of 96-bit
+    * surface elements so isl_tiling_get_info returns an isl_tile_info for a
+    * 32-bit element size.  It is the responsibility of the caller to
+    * recognize that 32 != 96 ad adjust accordingly.  For instance, to compute
+    * the width of a surface in tiles, you would do:
+    *
+    * width_tl = DIV_ROUND_UP(width_el * (format_bpb / tile_info.format_bpb),
+    *                         tile_info.logical_extent_el.width);
+    */
+   uint32_t format_bpb;
+
+   /** The logical size of the tile in units of format_bpb size elements
     *
     * This field determines how a given surface is cut up into tiles.  It is
     * used to compute the size of a surface in tiles and can be used to
-- 
2.7.4