From 6689fa2ab4eae15fbd73bba250f42b3fe3b50a3f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 21 Jun 2022 16:47:31 -0700 Subject: [PATCH] nir/range_analysis: Teach range analysis about fdot opcodes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This really, really helps on platforms where fabs() isn't free. A great many shaders use a * frsq(fabs(fdot(a, a))) to normalize a vector. Since the result of the fdot must be non-negative, the fabs can be eliminated by an existing algebraic rule. shader-db results: r300 (run on R420 - X800XL) total instructions in shared programs: 1369807 -> 1368550 (-0.09%) instructions in affected programs: 59986 -> 58729 (-2.10%) helped: 609 HURT: 0 total vinst in shared programs: 512899 -> 512861 (<.01%) vinst in affected programs: 1522 -> 1484 (-2.50%) helped: 36 HURT: 0 total sinst in shared programs: 260690 -> 260570 (-0.05%) sinst in affected programs: 1419 -> 1299 (-8.46%) helped: 120 HURT: 0 total consts in shared programs: 957295 -> 957230 (<.01%) consts in affected programs: 849 -> 784 (-7.66%) helped: 65 HURT: 0 LOST: 0 GAINED: 3 The 3 gained shaders are all vertex shaders from XCom: Enemy Unknown. I'm guessing that game is never going to run on my X800XL. :) i915 total instructions in shared programs: 791121 -> 780843 (-1.30%) instructions in affected programs: 220170 -> 209892 (-4.67%) helped: 2085 HURT: 0 total temps in shared programs: 47765 -> 47766 (<.01%) temps in affected programs: 9 -> 10 (11.11%) helped: 0 HURT: 1 total const in shared programs: 93048 -> 92983 (-0.07%) const in affected programs: 784 -> 719 (-8.29%) helped: 65 HURT: 0 LOST: 0 GAINED: 36 Haswell, Ivy Bridge, and Sandy Bridge had similar results. (Haswell shown) total instructions in shared programs: 16702250 -> 16697908 (-0.03%) instructions in affected programs: 119277 -> 114935 (-3.64%) helped: 1065 HURT: 0 helped stats (abs) min: 1 max: 20 x̄: 4.08 x̃: 4 helped stats (rel) min: 0.48% max: 10.17% x̄: 3.66% x̃: 3.94% 95% mean confidence interval for instructions value: -4.26 -3.89 95% mean confidence interval for instructions %-change: -3.76% -3.56% Instructions are helped. total cycles in shared programs: 880772068 -> 880734134 (<.01%) cycles in affected programs: 2134456 -> 2096522 (-1.78%) helped: 941 HURT: 324 helped stats (abs) min: 2 max: 2180 x̄: 123.06 x̃: 44 helped stats (rel) min: 0.04% max: 49.96% x̄: 7.08% x̃: 3.81% HURT stats (abs) min: 2 max: 2098 x̄: 240.33 x̃: 35 HURT stats (rel) min: 0.04% max: 77.07% x̄: 12.34% x̃: 3.00% 95% mean confidence interval for cycles value: -47.93 -12.04 95% mean confidence interval for cycles %-change: -2.87% -1.34% Cycles are helped. No shader-db changes on any other Intel platform. Reviewed-by: Jason Ekstrand Reviewed-by: Emma Anholt Part-of: --- src/compiler/nir/nir_range_analysis.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 46a7dc8..700159b 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -1046,6 +1046,37 @@ analyze_expression(const nir_alu_instr *instr, unsigned src, r = (struct ssa_result_range){le_zero, false, true, false}; break; + case nir_op_fdot2: + case nir_op_fdot3: + case nir_op_fdot4: + case nir_op_fdot8: + case nir_op_fdot16: + case nir_op_fdot2_replicated: + case nir_op_fdot3_replicated: + case nir_op_fdot4_replicated: + case nir_op_fdot8_replicated: + case nir_op_fdot16_replicated: { + const struct ssa_result_range left = + analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); + + /* If the two sources are the same SSA value, then the result is either + * NaN or some number >= 0. If one source is the negation of the other, + * the result is either NaN or some number <= 0. + * + * In either of these two cases, if one source is a number, then the + * other must also be a number. Since it should not be possible to get + * Inf-Inf in the dot-product, the result must also be a number. + */ + if (nir_alu_srcs_equal(alu, alu, 0, 1)) { + r = (struct ssa_result_range){ge_zero, false, left.is_a_number, false }; + } else if (nir_alu_srcs_negative_equal(alu, alu, 0, 1)) { + r = (struct ssa_result_range){le_zero, false, left.is_a_number, false }; + } else { + r = (struct ssa_result_range){unknown, false, false, false}; + } + break; + } + case nir_op_fpow: { /* Due to flush-to-zero semanatics of floating-point numbers with very * small mangnitudes, we can never really be sure a result will be -- 2.7.4