From: Marek Olšák Date: Fri, 4 Sep 2020 09:55:25 +0000 (-0400) Subject: nir,radeonsi: move ffma fusing to late optimizations for better codegen X-Git-Tag: upstream/21.0.0~5382 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=57bf4c2028cffe24ffb55b96592f7e33aa18f1ce;p=platform%2Fupstream%2Fmesa.git nir,radeonsi: move ffma fusing to late optimizations for better codegen The freedreno trace changes were suggested by Rob Clark. ALU performance is higher, because ffma is used more often, but so is register usage, because trinary opcodes (such as ffma) usually need at least 3 live registers. 54793 shaders in 33659 tests Totals: SGPRS: 2639746 -> 2642938 (0.12 %) VGPRS: 1534120 -> 1536392 (0.15 %) Spilled SGPRs: 3541 -> 3618 (2.17 %) Spilled VGPRs: 33 -> 44 (33.33 %) Scratch size: 292 -> 312 (6.85 %) dwords per thread Code Size: 55639836 -> 55620116 (-0.04 %) bytes Max Waves: 964785 -> 963977 (-0.08 %) Totals from affected shaders: SGPRS: 1105800 -> 1108992 (0.29 %) VGPRS: 635292 -> 637564 (0.36 %) Spilled SGPRs: 3193 -> 3270 (2.41 %) Spilled VGPRs: 33 -> 44 (33.33 %) Scratch size: 36 -> 56 (55.56 %) dwords per thread Code Size: 31568708 -> 31548988 (-0.06 %) bytes Max Waves: 319991 -> 319183 (-0.25 %) Reviewed-by: Connor Abbott Part-of: --- diff --git a/.gitlab-ci/traces-freedreno.yml b/.gitlab-ci/traces-freedreno.yml index 163833a..daa72ee 100644 --- a/.gitlab-ci/traces-freedreno.yml +++ b/.gitlab-ci/traces-freedreno.yml @@ -11,12 +11,12 @@ traces: - path: gputest/furmark.trace expectations: - device: freedreno-a630 - checksum: de674022e53fc9e0a9eb217f8bf0fe03 + checksum: af6e1faf11407a7e7c416f2c532de029 # Note: Requires GL3.3 - path: gputest/gimark.trace expectations: - device: freedreno-a630 - checksum: 2cae8e2104356e2b3017cbd953cf7b4a + checksum: 47419914b87422b267e20b6981a7eb43 - path: gputest/pixmark-julia-fp32.trace expectations: - device: freedreno-a630 @@ -37,16 +37,16 @@ traces: expectations: # Looks fine, but totally different shape from the rendering on i965. - device: freedreno-a630 - checksum: 86d678c70b8adf27095ace1a6bbfe2d2 + checksum: 9ee5a036510be0f506705eacc1516bf3 - path: gputest/plot3d.trace expectations: - device: freedreno-a630 - checksum: 67a9eb692e694b11107860bbcd47d493 + checksum: 42aba3ab943dae2fe952cae1ff91c354 # Note: Requires GL4 for tess. - path: gputest/tessmark.trace expectations: - device: freedreno-a630 - checksum: 985e231b58b7dc4b6da34ff32f8ebb82 + checksum: 8688b3904b6b2bc591d8b669ecae4d53 - path: gputest/triangle.trace expectations: - device: freedreno-a630 @@ -149,7 +149,7 @@ traces: - path: glmark2/effect2d-kernel=1,1,1,1,1;1,1,1,1,1;1,1,1,1,1;.rdc expectations: - device: freedreno-a630 - checksum: 2346a6597f4d1f20b493e8d6a8f7e592 + checksum: 2964d37446db126a5fe462b1ba4542cd - path: glmark2/function-fragment-complexity=low:fragment-steps=5.rdc expectations: # Incorrect rendering, a bunch of the area is uniform gray when it should @@ -215,7 +215,7 @@ traces: - path: glmark2/shading-shading=gouraud.rdc expectations: - device: freedreno-a630 - checksum: fcc26fca31375b216382e69bc5f113fb + checksum: bd9058f041bd2d59c039cccdb7d50bf7 - path: glmark2/shading-shading=phong.rdc # Some speckling on the main specular highlight that may just be # mediump artifacts @@ -226,11 +226,6 @@ traces: expectations: - device: freedreno-a630 checksum: d8b5931669733240797f1acf5d98db25 - # Very yellow terrain compared to i965, may just be mediump artifacts. - - path: glmark2/terrain.rdc - expectations: - - device: freedreno-a630 - checksum: 114f7dfe97768d9c565a29f656c8f9cf - path: glmark2/texture-texture-filter=linear.rdc expectations: - device: freedreno-a630 diff --git a/.gitlab-ci/traces-radeonsi.yml b/.gitlab-ci/traces-radeonsi.yml index 111fc63..13b1da5 100644 --- a/.gitlab-ci/traces-radeonsi.yml +++ b/.gitlab-ci/traces-radeonsi.yml @@ -33,11 +33,11 @@ traces: - path: gputest/furmark.trace expectations: - device: gl-radeonsi-stoney - checksum: 1c569668d608c644f353caa177d577c6 + checksum: d71c0d8e6c46c8f29d1aa8d0ed7d3c87 - path: gputest/pixmark-piano.trace expectations: - device: gl-radeonsi-stoney - checksum: a0e1d6358f76666603b08eab383af080 + checksum: 777d48e82cabceef6d9489189f91d266 - path: gputest/triangle.trace expectations: - device: gl-radeonsi-stoney @@ -153,7 +153,7 @@ traces: - path: glmark2/shadow.rdc expectations: - device: gl-radeonsi-stoney - checksum: 4bf5ca9ce641de1031eb8125d80a3005 + checksum: 03dfbf026a0f0ab643e5a6ef19623e81 - path: glmark2/terrain.rdc expectations: - device: gl-radeonsi-stoney @@ -173,7 +173,7 @@ traces: - path: godot/Material Testers.x86_64_2020.04.08_13.38_frame799.rdc expectations: - device: gl-radeonsi-stoney - checksum: 5164e238381e7d77a64e3de771cc005f + checksum: 990abd360dc380c95ee2645f8b402d47 - path: gputest/gimark.trace expectations: - device: gl-radeonsi-stoney @@ -189,15 +189,15 @@ traces: - path: gputest/pixmark-piano.trace expectations: - device: gl-radeonsi-stoney - checksum: a0e1d6358f76666603b08eab383af080 + checksum: 777d48e82cabceef6d9489189f91d266 - path: gputest/pixmark-volplosion.trace expectations: - device: gl-radeonsi-stoney - checksum: 2fba173643c014bcfa4b31eb55a514b9 + checksum: 708f92a8ac8aef23a4a544cc5ec755d6 - path: gputest/plot3d.trace expectations: - device: gl-radeonsi-stoney - checksum: fd367551aa74e2903e0590a893da01a6 + checksum: f9e6c1cb70add69cf2a4724800d48b25 - path: gputest/tessmark.trace expectations: - device: gl-radeonsi-stoney @@ -229,7 +229,7 @@ traces: - path: supertuxkart/supertuxkart-antediluvian-abyss.rdc expectations: - device: gl-radeonsi-stoney - checksum: 17f4039392a65ad23133cb2cac82dba4 + checksum: a2c4c127873f93b7db4ef48ea9fb7689 - path: supertuxkart/supertuxkart-menu.rdc expectations: - device: gl-radeonsi-stoney @@ -237,4 +237,4 @@ traces: - path: supertuxkart/supertuxkart-ravenbridge-mansion.rdc expectations: - device: gl-radeonsi-stoney - checksum: 46f08af5c49d711b41d4082f8a5cf6d6 + checksum: c8f9eae92c67c7d53db4d69a703e3914 diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f2ef598..39c07ce 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -194,7 +194,8 @@ optimizations.extend([ (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'), (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), - (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'), + # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late). + (('~ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma'), (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'), ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))), @@ -2027,6 +2028,7 @@ late_optimizations = [ (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), (('ineg', a), ('isub', 0, a), 'options->lower_negate'), (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'), + (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'), # These are duplicated from the main optimizations table. The late # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 534973b..4b879bf 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -698,6 +698,17 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) if (changed) si_nir_opts(nir, false); + /* Run late optimizations to fuse ffma. */ + bool more_late_algebraic = true; + while (more_late_algebraic) { + more_late_algebraic = false; + NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late); + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + } + NIR_PASS_V(nir, nir_lower_bool_to_int32); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);