From 4913f54a1f3f1a513f3c44d8e88fb8e66810556b Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 3 May 2023 01:49:56 -0700 Subject: [PATCH] intel/compiler: UNDEF comparisons with smaller than 32-bit Comparisons which produce 32-bit boolean results (0 or 0xFFFFFFFF) but operate on 16-bit types would first generate a CMP instruction with W or HF types, before expanding it out. This CMP is a partial write, which leads us to think the register may contain some prior contents still. When placed in a loop, this causes its live range to extend beyond its real life time. Mark the register with UNDEF first so that we know that no prior contents exist and need to be preserved. This affects: flt32, fge32, feq32, fneu32, ilt32, ult32, ige32, uge32, ieq32, ine32 On one of Cyberpunk 2077's most complex compute shaders, this reduces the maximum live registers from 696 to 537 (22.8%). Together with the next patch, Cyberpunk's spills and fills are cut by 10.23% and 9.19%, respectively. Reviewed-by: Lionel Landwerlin Reviewed-by: Francisco Jerez Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 939245c..dd7205b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1370,8 +1370,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, fs_reg dest = result; const uint32_t bit_size = nir_src_bit_size(instr->src[0].src); - if (bit_size != 32) + if (bit_size != 32) { dest = bld.vgrf(op[0].type, 1); + bld.UNDEF(dest); + } bld.CMP(dest, op[0], op[1], brw_cmod_for_nir_comparison(instr->op)); @@ -1398,8 +1400,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, fs_reg dest = result; const uint32_t bit_size = type_sz(op[0].type) * 8; - if (bit_size != 32) + if (bit_size != 32) { dest = bld.vgrf(op[0].type, 1); + bld.UNDEF(dest); + } bld.CMP(dest, op[0], op[1], brw_cmod_for_nir_comparison(instr->op)); -- 2.7.4