From 45c7ff95fc7de2dd631aa79093f92de3d19e4e26 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 5 Mar 2019 12:08:29 -0800 Subject: [PATCH] intel/compiler: Repeat nir_opt_algebraic_late MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A tiny bit of help seems to come from nir_copy_prop. Future patches will benefit from this change. Doing more copy propagation on the vec4 backend led to a disaster in hurt cycles. v2: Fix typo in comment. Noticed by Matt. All Gen8+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 17224634 -> 17224623 (<.01%) instructions in affected programs: 4586 -> 4575 (-0.24%) helped: 11 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.19% max: 0.53% x̄: 0.27% x̃: 0.23% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -0.36% -0.19% Instructions are helped. total cycles in shared programs: 360828542 -> 360828714 (<.01%) cycles in affected programs: 151159 -> 151331 (0.11%) helped: 49 HURT: 28 helped stats (abs) min: 1 max: 254 x̄: 26.41 x̃: 6 helped stats (rel) min: 0.06% max: 12.02% x̄: 1.34% x̃: 0.42% HURT stats (abs) min: 1 max: 196 x̄: 52.36 x̃: 15 HURT stats (rel) min: 0.05% max: 10.74% x̄: 2.55% x̃: 0.88% 95% mean confidence interval for cycles value: -13.48 17.95 95% mean confidence interval for cycles %-change: -0.69% 0.84% Inconclusive result (value mean confidence interval includes 0). Haswell, Ivy Bridge, and Sandy Bridge had similar results. (Haswell shown) total instructions in shared programs: 13529544 -> 13529542 (<.01%) instructions in affected programs: 358 -> 356 (-0.56%) helped: 2 HURT: 0 total cycles in shared programs: 357290311 -> 357289678 (<.01%) cycles in affected programs: 178324 -> 177691 (-0.35%) helped: 48 HURT: 40 helped stats (abs) min: 1 max: 201 x̄: 31.52 x̃: 13 helped stats (rel) min: 0.06% max: 10.92% x̄: 1.71% x̃: 0.66% HURT stats (abs) min: 1 max: 224 x̄: 22.00 x̃: 6 HURT stats (rel) min: 0.05% max: 15.84% x̄: 1.29% x̃: 0.31% 95% mean confidence interval for cycles value: -18.28 3.89 95% mean confidence interval for cycles %-change: -1.01% 0.32% Inconclusive result (value mean confidence interval includes 0). Iron Lake and GM45 had similar results. (Iron Lake shown) total instructions in shared programs: 8159110 -> 8158980 (<.01%) instructions in affected programs: 22719 -> 22589 (-0.57%) helped: 65 HURT: 0 helped stats (abs) min: 1 max: 3 x̄: 2.00 x̃: 2 helped stats (rel) min: 0.07% max: 1.05% x̄: 0.73% x̃: 0.74% 95% mean confidence interval for instructions value: -2.06 -1.94 95% mean confidence interval for instructions %-change: -0.78% -0.68% Instructions are helped. total cycles in shared programs: 188609448 -> 188609214 (<.01%) cycles in affected programs: 1875852 -> 1875618 (-0.01%) helped: 109 HURT: 104 helped stats (abs) min: 2 max: 46 x̄: 5.30 x̃: 4 helped stats (rel) min: 0.02% max: 0.90% x̄: 0.09% x̃: 0.07% HURT stats (abs) min: 2 max: 20 x̄: 3.31 x̃: 2 HURT stats (rel) min: 0.01% max: 0.26% x̄: 0.04% x̃: 0.02% 95% mean confidence interval for cycles value: -1.95 -0.25 95% mean confidence interval for cycles %-change: -0.04% -0.01% Cycles are helped. Reviewed-by: Matt Turner --- src/compiler/nir/nir_opt_algebraic.py | 4 ++++ src/intel/compiler/brw_nir.c | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 00750ff..b984df0 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1118,6 +1118,10 @@ late_optimizations = [ (('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), (('~fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), + # nir_lower_to_source_mods will collapse this, but its existence during the + # optimization loop can prevent other optimizations. + (('fneg', ('fneg', a)), a), + (('~fge', ('fmin(is_used_once)', ('fadd(is_used_once)', a, b), ('fadd', c, d)), 0.0), ('iand', ('fge', a, ('fneg', b)), ('fge', c, ('fneg', d)))), (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index a057f28..9a4afb4 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -912,7 +912,22 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, compiler->devinfo->gen >= 6); } - OPT(nir_opt_algebraic_late); + do { + progress = false; + if (OPT(nir_opt_algebraic_late)) { + /* At this late stage, anything that makes more constants will wreak + * havok on the vec4 backend. The handling of constants in the vec4 + * backend is not good. + */ + if (is_scalar) { + OPT(nir_opt_constant_folding); + OPT(nir_copy_prop); + } + OPT(nir_opt_dce); + OPT(nir_opt_cse); + } + } while (progress); + OPT(brw_nir_lower_conversions); -- 2.7.4