From d099e47de048723e6296626b04c06612cf828ad5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 25 Jul 2023 10:09:18 +0300 Subject: [PATCH] intel/fs: add more UNDEFs around SEND messages lower_find_live_channel() in particular is used a lot in control flow to find the live channel for the surface/sampler handle. Adding UNDEFs on the temporary registers used for finding the live channels helps reduce the liveness of those temporary registers, especially in loops. Some titles affected : Rise Of The Tomb Raider: Totals from 2780 (22.58% of 12311) affected shaders: Instrs: 1294455 -> 1294592 (+0.01%); split: -0.15%, +0.16% Cycles: 1473136441 -> 1471302617 (-0.12%); split: -1.52%, +1.40% Max live registers: 144282 -> 143595 (-0.48%) Max dispatch width: 22200 -> 22232 (+0.14%) Red Dead Redemption 2: Totals from 435 (7.28% of 5972) affected shaders: Instrs: 488472 -> 487594 (-0.18%); split: -0.31%, +0.14% Cycles: 11354732 -> 11384928 (+0.27%); split: -0.44%, +0.71% Spill count: 1217 -> 1172 (-3.70%) Fill count: 3521 -> 3447 (-2.10%) Scratch Memory Size: 64512 -> 62464 (-3.17%) Max live registers: 35997 -> 35798 (-0.55%) Fallout 4: Totals from 8 (0.49% of 1638) affected shaders: Instrs: 41908 -> 40509 (-3.34%) Cycles: 3638464 -> 3555680 (-2.28%); split: -2.67%, +0.39% Spill count: 717 -> 665 (-7.25%) Fill count: 2542 -> 2438 (-4.09%) Scratch Memory Size: 32768 -> 16384 (-50.00%) Max live registers: 567 -> 534 (-5.82%) Cyberpunk 2077: Totals from 2984 (28.97% of 10301) affected shaders: Instrs: 3888874 -> 3891600 (+0.07%); split: -0.20%, +0.27% Cycles: 67906489 -> 67767721 (-0.20%); split: -0.68%, +0.47% Spill count: 200 -> 98 (-51.00%) Fill count: 237 -> 90 (-62.03%) Scratch Memory Size: 10240 -> 8192 (-20.00%) Max live registers: 215715 -> 212727 (-1.39%) Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Reviewed-by: Francisco Jerez Part-of: --- src/intel/compiler/brw_fs.cpp | 2 ++ src/intel/compiler/brw_lower_logical_sends.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b8217b7..6a8bd03 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5826,6 +5826,7 @@ fs_visitor::lower_find_live_channel() */ if (!(first && packed_dispatch)) { fs_reg mask = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.UNDEF(mask); ubld.emit(SHADER_OPCODE_READ_SR_REG, mask, brw_imm_ud(vmask ? 3 : 2)); /* Quarter control has the effect of magically shifting the value of @@ -5843,6 +5844,7 @@ fs_visitor::lower_find_live_channel() ubld.FBL(inst->dst, exec_mask); } else { fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 1); + ubld.UNDEF(tmp); ubld.LZD(tmp, exec_mask); ubld.ADD(inst->dst, negate(tmp), brw_imm_uw(31)); } diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index d4cfa9d..d86a902 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1344,6 +1344,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst) const fs_visitor *v = static_cast(bld.shader); const fs_reg vector_mask = ubld.vgrf(BRW_REGISTER_TYPE_UW); + ubld.UNDEF(vector_mask); ubld.emit(SHADER_OPCODE_READ_SR_REG, vector_mask, brw_imm_ud(3)); const unsigned subreg = sample_mask_flag_subreg(v); -- 2.7.4