From: Ian Romanick Date: Fri, 23 Jun 2023 02:03:25 +0000 (-0700) Subject: intel/fs: Constant fold OR and AND X-Git-Tag: upstream/23.3.3~5073 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cb0de0a1d3420ddf9da56b24b5dd09205b8574ea;p=platform%2Fupstream%2Fmesa.git intel/fs: Constant fold OR and AND The path taken in fs_visitor::swizzle_nir_scratch_addr for DG2 generates some AND and OR instructions before the SHL. This commit folds those so the whold calculation becomes a constant (like on older platforms). v2: Fix return type of src_as_uint. Noticed by Marcin. shader-db results: DG2 total instructions in shared programs: 23190475 -> 23179540 (-0.05%) instructions in affected programs: 36026 -> 25091 (-30.35%) helped: 7 / HURT: 0 total cycles in shared programs: 841196807 -> 841142563 (<.01%) cycles in affected programs: 1660670 -> 1606426 (-3.27%) helped: 7 / HURT: 0 No shader-db changes on any older Intel platforms. fossil-db results: DG2 Totals: Instrs: 197780372 -> 197773966 (-0.00%) Cycles: 14066410782 -> 14066399378 (-0.00%); split: -0.00%, +0.00% Subgroup size: 8438104 -> 8438112 (+0.00%) Send messages: 8049445 -> 8049446 (+0.00%) Scratch Memory Size: 14263296 -> 14264320 (+0.01%) Totals from 9 (0.00% of 668055) affected shaders: Instrs: 24547 -> 18141 (-26.10%) Cycles: 1984791 -> 1973387 (-0.57%); split: -0.98%, +0.40% Subgroup size: 88 -> 96 (+9.09%) Send messages: 867 -> 868 (+0.12%) Scratch Memory Size: 69632 -> 70656 (+1.47%) No fossil-db changes on any older Intel platforms. Reviewed-by: Kenneth Graunke Part-of: --- diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 3aaac3c..6e10036 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2568,6 +2568,62 @@ fs_visitor::lower_constant_loads() invalidate_analysis(DEPENDENCY_INSTRUCTIONS); } +static uint64_t +src_as_uint(const fs_reg &src) +{ + assert(src.file == IMM); + + switch (src.type) { + case BRW_REGISTER_TYPE_W: + return (uint64_t)(int16_t)(src.ud & 0xffff); + + case BRW_REGISTER_TYPE_UW: + return (uint64_t)(uint16_t)(src.ud & 0xffff); + + case BRW_REGISTER_TYPE_D: + return (uint64_t)src.d; + + case BRW_REGISTER_TYPE_UD: + return (uint64_t)src.ud; + + case BRW_REGISTER_TYPE_Q: + return src.d64; + + case BRW_REGISTER_TYPE_UQ: + return src.u64; + + default: + unreachable("Invalid integer type."); + } +} + +static fs_reg +brw_imm_for_type(uint64_t value, enum brw_reg_type type) +{ + switch (type) { + case BRW_REGISTER_TYPE_W: + return brw_imm_w(value); + + case BRW_REGISTER_TYPE_UW: + return brw_imm_uw(value); + + case BRW_REGISTER_TYPE_D: + return brw_imm_d(value); + + case BRW_REGISTER_TYPE_UD: + return brw_imm_ud(value); + + case BRW_REGISTER_TYPE_Q: + return brw_imm_d(value); + + case BRW_REGISTER_TYPE_UQ: + return brw_imm_uq(value); + + default: + unreachable("Invalid integer type."); + } +} + bool fs_visitor::opt_algebraic() { @@ -2701,7 +2757,35 @@ fs_visitor::opt_algebraic() break; } break; + + case BRW_OPCODE_AND: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + const uint64_t src0 = src_as_uint(inst->src[0]); + const uint64_t src1 = src_as_uint(inst->src[1]); + + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type); + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + case BRW_OPCODE_OR: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + const uint64_t src0 = src_as_uint(inst->src[0]); + const uint64_t src1 = src_as_uint(inst->src[1]); + + inst->opcode = BRW_OPCODE_MOV; + inst->sources = 1; + inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type); + inst->src[1] = reg_undef; + progress = true; + break; + } + if (inst->src[0].equals(inst->src[1]) || inst->src[1].is_zero()) { /* On Gfx8+, the OR instruction can have a source modifier that diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index d16077c..ba0ebc5 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -935,8 +935,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case BRW_OPCODE_MUL: case SHADER_OPCODE_MULH: case BRW_OPCODE_ADD: - case BRW_OPCODE_OR: - case BRW_OPCODE_AND: case BRW_OPCODE_XOR: case BRW_OPCODE_ADDC: if (i == 1) { @@ -1072,6 +1070,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } break; + case BRW_OPCODE_AND: + case BRW_OPCODE_OR: case SHADER_OPCODE_TEX_LOGICAL: case SHADER_OPCODE_TXD_LOGICAL: case SHADER_OPCODE_TXF_LOGICAL: @@ -1120,6 +1120,17 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } } + /* If only one of the sources of a 2-source, commutative instruction (e.g., + * AND) is immediate, it must be src1. If both are immediate, opt_algebraic + * should fold it away. + */ + if (progress && inst->sources == 2 && inst->is_commutative() && + inst->src[0].file == IMM && inst->src[1].file != IMM) { + const auto src1 = inst->src[1]; + inst->src[1] = inst->src[0]; + inst->src[0] = src1; + } + return progress; }