iris/gen12: Implement programming of pixel pipe hashing tables.
authorFrancisco Jerez <currojerez@riseup.net>
Thu, 14 Jan 2021 07:07:22 +0000 (23:07 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 24 Feb 2021 05:15:25 +0000 (21:15 -0800)
Straightforward by using the pixel hashing table computation helper
previously introduced, assuming we know the fraction of work that
needs to be submitted to each pixel pipe.  Note that AFAIA the
hardware maps indices in the table to pixel pipes from largest to
smallest, so it shouldn't be necessary to permute indices based on the
physical IDs of the pixel pipes as we are doing on Gen11.

Improves performance of most non-trivial graphics workloads I've tried
on an 80 EU TGL.  E.g. the following testcases improve performance
significantly with sample size 27 and statistical significance 1%:

  gputest/pixmark_piano:      62.89% ±0.10%
  gputest/pixmark_volplosion: 61.51% ±0.06%
  unigine/valley:             26.72% ±0.25%
  gfxbench/gl_5_high:         24.70% ±0.19%
  unigine/heaven:             23.54% ±0.17%
  steam/csgo:                 22.75% ±4.36%
  gfxbench/gl_manhattan31:    22.43% ±0.29%
  gfxbench/gl_4:              20.92% ±0.35%
  warsow/benchsow:            19.15% ±2.53%
  gfxbench/gl_trex_off:       18.84% ±0.27%

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8749>

src/gallium/drivers/iris/iris_state.c

index fadc9a3..f7cc7ed 100644 (file)
@@ -843,7 +843,7 @@ calculate_pixel_hashing_table(unsigned n, unsigned m,
 
 #if GEN_GEN == 11
 static void
-iris_upload_slice_hashing_state(struct iris_batch *batch)
+gen11_upload_pixel_hashing_tables(struct iris_batch *batch)
 {
    const struct gen_device_info *devinfo = &batch->screen->devinfo;
    assert(devinfo->ppipe_subslices[2] == 0);
@@ -877,6 +877,53 @@ iris_upload_slice_hashing_state(struct iris_batch *batch)
       mode.SliceHashingTableEnable = true;
    }
 }
+#elif GEN_VERSIONx10 == 120
+static void
+gen12_upload_pixel_hashing_tables(struct iris_batch *batch)
+{
+   const struct gen_device_info *devinfo = &batch->screen->devinfo;
+   /* For each n calculate ppipes_of[n], equal to the number of pixel pipes
+    * present with n active dual subslices.
+    */
+   unsigned ppipes_of[3] = {};
+
+   for (unsigned n = 0; n < ARRAY_SIZE(ppipes_of); n++) {
+      for (unsigned p = 0; p < ARRAY_SIZE(devinfo->ppipe_subslices); p++)
+         ppipes_of[n] += (devinfo->ppipe_subslices[p] == n);
+   }
+
+   /* Gen12 has three pixel pipes. */
+   assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3);
+
+   if (ppipes_of[2] == 3 || ppipes_of[0] == 2) {
+      /* All three pixel pipes have the maximum number of active dual
+       * subslices, or there is only one active pixel pipe: Nothing to do.
+       */
+      return;
+   }
+
+   iris_emit_cmd(batch, GENX(3DSTATE_SUBSLICE_HASH_TABLE), p) {
+      p.SliceHashControl[0] = TABLE_0;
+
+      if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
+         calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.TwoWayTableEntry[0]);
+      else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
+         calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.TwoWayTableEntry[0]);
+
+      if (ppipes_of[2] == 2 && ppipes_of[1] == 1)
+         calculate_pixel_hashing_table(8, 16, 5, 4, 0, p.ThreeWayTableEntry[0]);
+      else if (ppipes_of[2] == 2 && ppipes_of[0] == 1)
+         calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.ThreeWayTableEntry[0]);
+      else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1)
+         calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.ThreeWayTableEntry[0]);
+      else
+         unreachable("Illegal fusing.");
+   }
+
+   iris_emit_cmd(batch, GENX(3DSTATE_3D_MODE), p) {
+      p.SubsliceHashingTableEnable = true;
+   }
+}
 #endif
 
 static void
@@ -1002,7 +1049,11 @@ iris_init_render_context(struct iris_batch *batch)
       iris_emit_lri(batch, CACHE_MODE_0, reg_val);
    }
 
-   iris_upload_slice_hashing_state(batch);
+   gen11_upload_pixel_hashing_tables(batch);
+#endif
+
+#if GEN_VERSIONx10 == 120
+   gen12_upload_pixel_hashing_tables(batch);
 #endif
 
    /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid