From 17add74decb30de6140bbd607b12bb9a5dce760e Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 13 Jan 2021 23:07:22 -0800 Subject: [PATCH] iris/gen12: Implement programming of pixel pipe hashing tables. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Straightforward by using the pixel hashing table computation helper previously introduced, assuming we know the fraction of work that needs to be submitted to each pixel pipe. Note that AFAIA the hardware maps indices in the table to pixel pipes from largest to smallest, so it shouldn't be necessary to permute indices based on the physical IDs of the pixel pipes as we are doing on Gen11. Improves performance of most non-trivial graphics workloads I've tried on an 80 EU TGL. E.g. the following testcases improve performance significantly with sample size 27 and statistical significance 1%: gputest/pixmark_piano: 62.89% ±0.10% gputest/pixmark_volplosion: 61.51% ±0.06% unigine/valley: 26.72% ±0.25% gfxbench/gl_5_high: 24.70% ±0.19% unigine/heaven: 23.54% ±0.17% steam/csgo: 22.75% ±4.36% gfxbench/gl_manhattan31: 22.43% ±0.29% gfxbench/gl_4: 20.92% ±0.35% warsow/benchsow: 19.15% ±2.53% gfxbench/gl_trex_off: 18.84% ±0.27% Reviewed-by: Jason Ekstrand Part-of: --- src/gallium/drivers/iris/iris_state.c | 55 +++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index fadc9a3..f7cc7ed 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -843,7 +843,7 @@ calculate_pixel_hashing_table(unsigned n, unsigned m, #if GEN_GEN == 11 static void -iris_upload_slice_hashing_state(struct iris_batch *batch) +gen11_upload_pixel_hashing_tables(struct iris_batch *batch) { const struct gen_device_info *devinfo = &batch->screen->devinfo; assert(devinfo->ppipe_subslices[2] == 0); @@ -877,6 +877,53 @@ iris_upload_slice_hashing_state(struct iris_batch *batch) mode.SliceHashingTableEnable = true; } } +#elif GEN_VERSIONx10 == 120 +static void +gen12_upload_pixel_hashing_tables(struct iris_batch *batch) +{ + const struct gen_device_info *devinfo = &batch->screen->devinfo; + /* For each n calculate ppipes_of[n], equal to the number of pixel pipes + * present with n active dual subslices. + */ + unsigned ppipes_of[3] = {}; + + for (unsigned n = 0; n < ARRAY_SIZE(ppipes_of); n++) { + for (unsigned p = 0; p < ARRAY_SIZE(devinfo->ppipe_subslices); p++) + ppipes_of[n] += (devinfo->ppipe_subslices[p] == n); + } + + /* Gen12 has three pixel pipes. */ + assert(ppipes_of[0] + ppipes_of[1] + ppipes_of[2] == 3); + + if (ppipes_of[2] == 3 || ppipes_of[0] == 2) { + /* All three pixel pipes have the maximum number of active dual + * subslices, or there is only one active pixel pipe: Nothing to do. + */ + return; + } + + iris_emit_cmd(batch, GENX(3DSTATE_SUBSLICE_HASH_TABLE), p) { + p.SliceHashControl[0] = TABLE_0; + + if (ppipes_of[2] == 2 && ppipes_of[0] == 1) + calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.TwoWayTableEntry[0]); + else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1) + calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.TwoWayTableEntry[0]); + + if (ppipes_of[2] == 2 && ppipes_of[1] == 1) + calculate_pixel_hashing_table(8, 16, 5, 4, 0, p.ThreeWayTableEntry[0]); + else if (ppipes_of[2] == 2 && ppipes_of[0] == 1) + calculate_pixel_hashing_table(8, 16, 2, 2, 0, p.ThreeWayTableEntry[0]); + else if (ppipes_of[2] == 1 && ppipes_of[1] == 1 && ppipes_of[0] == 1) + calculate_pixel_hashing_table(8, 16, 3, 3, 0, p.ThreeWayTableEntry[0]); + else + unreachable("Illegal fusing."); + } + + iris_emit_cmd(batch, GENX(3DSTATE_3D_MODE), p) { + p.SubsliceHashingTableEnable = true; + } +} #endif static void @@ -1002,7 +1049,11 @@ iris_init_render_context(struct iris_batch *batch) iris_emit_lri(batch, CACHE_MODE_0, reg_val); } - iris_upload_slice_hashing_state(batch); + gen11_upload_pixel_hashing_tables(batch); +#endif + +#if GEN_VERSIONx10 == 120 + gen12_upload_pixel_hashing_tables(batch); #endif /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid -- 2.7.4