From 7873edee6e5c65eb8e9622264c2d7ae9b31cf4e6 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 23 Mar 2023 16:20:38 -0700 Subject: [PATCH] intel/fs: Use specialized version of regions_overlap in opt_copy_propagation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Since one of the register must always be either VGRF or FIXED_GRF, much of regions_overlap and reg_offset can be elided. On my Ice Lake laptop (using a locked CPU speed and other measures to prevent thermal throttling, etc.) using a debugoptimized build, improves performance of Vulkan CTS "deqp-vk --deqp-case='dEQP-VK.*spir*'" by -0.29% ± 0.097% (n = 5, pooled s = 0.361697). Using a release build, improves performance of compiling shaders from batman_arkham_city_goty.foz by -3.3% ± 0.04% (n = 5, pooled s = 0.178312). Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/brw_fs_copy_propagation.cpp | 42 ++++++++++++++++++++------ 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 0555694..1cd1a2f 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -186,6 +186,28 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, } /** + * Like reg_offset, but register must be VGRF or FIXED_GRF. + */ +static inline unsigned +grf_reg_offset(const fs_reg &r) +{ + return (r.file == VGRF ? 0 : r.nr) * REG_SIZE + + r.offset + + (r.file == FIXED_GRF ? r.subnr : 0); +} + +/** + * Like regions_overlap, but register must be VGRF or FIXED_GRF. + */ +static inline bool +grf_regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds) +{ + return reg_space(r) == reg_space(s) && + !(grf_reg_offset(r) + dr <= grf_reg_offset(s) || + grf_reg_offset(s) + ds <= grf_reg_offset(r)); +} + +/** * Set up initial values for each of the data flow sets, prior to running * the fixed-point algorithm. */ @@ -227,8 +249,8 @@ fs_copy_prop_dataflow::setup_initial_values() unsigned idx = reg_space(inst->dst) & (acp_table_size - 1); foreach_in_list(acp_entry, entry, &acp_table[idx]) { - if (regions_overlap(inst->dst, inst->size_written, - entry->dst, entry->size_written)) { + if (grf_regions_overlap(inst->dst, inst->size_written, + entry->dst, entry->size_written)) { BITSET_SET(bd[block->num].kill, entry->global_idx); if (inst->force_writemask_all && !entry->force_writemask_all) BITSET_SET(bd[block->num].exec_mismatch, entry->global_idx); @@ -257,8 +279,8 @@ fs_copy_prop_dataflow::setup_initial_values() unsigned idx = reg_space(inst->dst) & (acp_table_size - 1); foreach_in_list(acp_entry, entry, &acp_table[idx]) { - if (regions_overlap(inst->dst, inst->size_written, - entry->src, entry->size_read)) { + if (grf_regions_overlap(inst->dst, inst->size_written, + entry->src, entry->size_read)) { BITSET_SET(bd[block->num].kill, entry->global_idx); if (inst->force_writemask_all && !entry->force_writemask_all) BITSET_SET(bd[block->num].exec_mismatch, entry->global_idx); @@ -1075,8 +1097,8 @@ can_propagate_from(fs_inst *inst) return (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == VGRF && ((inst->src[0].file == VGRF && - !regions_overlap(inst->dst, inst->size_written, - inst->src[0], inst->size_read(0))) || + !grf_regions_overlap(inst->dst, inst->size_written, + inst->src[0], inst->size_read(0))) || inst->src[0].file == ATTR || inst->src[0].file == UNIFORM || inst->src[0].file == IMM || @@ -1116,8 +1138,8 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, /* kill the destination from the ACP */ if (inst->dst.file == VGRF || inst->dst.file == FIXED_GRF) { foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) { - if (regions_overlap(entry->dst, entry->size_written, - inst->dst, inst->size_written)) + if (grf_regions_overlap(entry->dst, entry->size_written, + inst->dst, inst->size_written)) entry->remove(); } @@ -1129,8 +1151,8 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, /* Make sure we kill the entry if this instruction overwrites * _any_ of the registers that it reads */ - if (regions_overlap(entry->src, entry->size_read, - inst->dst, inst->size_written)) + if (grf_regions_overlap(entry->src, entry->size_read, + inst->dst, inst->size_written)) entry->remove(); } } -- 2.7.4