From 61c786bad51c48759ea4d2e8c405e5f99abb69a7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 13 Nov 2020 19:11:56 -0800 Subject: [PATCH] intel/fs: Constant fold SHL This is a modified version of a commit originally in !7698. This version add the changes to brw_fs_copy_propagation. If the address passed to fs_visitor::swizzle_nir_scratch_addr is a constant, that function will generate SHL with two constant sources. DG2 uses a different path to generate those addresses, so the constant folding can't occur there yet. That will be addressed in the next commit. What follows is the commit change history from that older MR. v2: Previously this commit was after `intel/fs: Combine constants for integer instructions too`. However, this commit can create invalid instructions that are only cleaned up by `intel/fs: Combine constants for integer instructions too`. That would potentially affect the shader-db results of each commit, but I did not collect new data for the reordering. v3: Fix masking for W/UW and for Q/UQ types. Add an assertion for !saturate. Both suggested by Ken. Also add an assertion that B/UB types don't matically come back. v4: Fix sources count. See also ed3c2f73dbb ("intel/fs: fixup sources number from opt_algebraic"). v5: Fix typo in comment added in v3. Noticed by Marcin. Fix a typo in a comment added when pulling this commit out of !7698. Noticed by Ken. shader-db results: DG2 No changes. Tiger Lake, Ice Lake, and Skylake had similar results (Ice Lake shown) total instructions in shared programs: 20655696 -> 20651648 (-0.02%) instructions in affected programs: 23125 -> 19077 (-17.50%) helped: 7 / HURT: 0 total cycles in shared programs: 858436639 -> 858407749 (<.01%) cycles in affected programs: 8990532 -> 8961642 (-0.32%) helped: 7 / HURT: 0 Broadwell and Haswell had similar results. (Broadwell shown) total instructions in shared programs: 18500780 -> 18496630 (-0.02%) instructions in affected programs: 24715 -> 20565 (-16.79%) helped: 7 / HURT: 0 total cycles in shared programs: 946100660 -> 946087688 (<.01%) cycles in affected programs: 5838252 -> 5825280 (-0.22%) helped: 7 / HURT: 0 total spills in shared programs: 17588 -> 17572 (-0.09%) spills in affected programs: 1206 -> 1190 (-1.33%) helped: 2 / HURT: 0 total fills in shared programs: 25192 -> 25156 (-0.14%) fills in affected programs: 156 -> 120 (-23.08%) helped: 2 / HURT: 0 No shader-db changes on any older Intel platforms. fossil-db results: DG2 Totals: Instrs: 197780415 -> 197780372 (-0.00%); split: -0.00%, +0.00% Cycles: 14066412266 -> 14066410782 (-0.00%); split: -0.00%, +0.00% Totals from 16 (0.00% of 668055) affected shaders: Instrs: 16420 -> 16377 (-0.26%); split: -0.43%, +0.17% Cycles: 220133 -> 218649 (-0.67%); split: -0.69%, +0.01% Tiger Lake, Ice Lake and Skylake had similar results. (Ice Lake shown) Totals: Instrs: 153425977 -> 153423678 (-0.00%) Cycles: 14747928947 -> 14747929547 (+0.00%); split: -0.00%, +0.00% Subgroup size: 8535968 -> 8535976 (+0.00%) Send messages: 7697606 -> 7697607 (+0.00%) Scratch Memory Size: 4380672 -> 4381696 (+0.02%) Totals from 6 (0.00% of 662749) affected shaders: Instrs: 13893 -> 11594 (-16.55%) Cycles: 5386074 -> 5386674 (+0.01%); split: -0.42%, +0.43% Subgroup size: 80 -> 88 (+10.00%) Send messages: 675 -> 676 (+0.15%) Scratch Memory Size: 91136 -> 92160 (+1.12%) Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_fs.cpp | 33 ++++++++++++++++++++++++++ src/intel/compiler/brw_fs_copy_propagation.cpp | 12 +++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 020a5cb..3aaac3c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2822,6 +2822,39 @@ fs_visitor::opt_algebraic() progress = true; } break; + case BRW_OPCODE_SHL: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + /* It's not currently possible to generate this, and this constant + * folding does not handle it. + */ + assert(!inst->saturate); + + fs_reg result; + + switch (type_sz(inst->src[0].type)) { + case 2: + result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f))); + break; + case 4: + result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f)); + break; + case 8: + result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f)); + break; + default: + /* Just in case a future platform re-enables B or UB types. */ + unreachable("Invalid source size."); + } + + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = retype(result, inst->dst.type); + inst->src[1] = reg_undef; + inst->sources = 1; + + progress = true; + } + break; + case SHADER_OPCODE_BROADCAST: if (is_uniform(inst->src[0])) { inst->opcode = BRW_OPCODE_MOV; diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 9b14b25..d16077c 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -912,7 +912,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) FALLTHROUGH; case BRW_OPCODE_BFI1: case BRW_OPCODE_ASR: - case BRW_OPCODE_SHL: case BRW_OPCODE_SHR: case BRW_OPCODE_SUBB: if (i == 1) { @@ -921,6 +920,17 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } break; + case BRW_OPCODE_SHL: + /* Only constant propagate into src0 if src1 is also constant. In that + * specific case, constant folding will eliminate the instruction. + */ + if ((i == 0 && inst->src[1].file == IMM) || + i == 1) { + inst->src[i] = val; + progress = true; + } + break; + case BRW_OPCODE_MACH: case BRW_OPCODE_MUL: case SHADER_OPCODE_MULH: -- 2.7.4