From bda1d72dd999a819b9645f55c2247bf84292bf34 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 2 Apr 2020 16:20:34 -0700 Subject: [PATCH] intel/fs: Replace fs_visitor::bank_conflict_cycles() with stand-alone function. This will be re-usable by the IR performance analysis pass. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs.h | 1 - src/intel/compiler/brw_fs_bank_conflicts.cpp | 22 +++++++++------------- src/intel/compiler/brw_ir_fs.h | 3 +++ src/intel/compiler/brw_schedule_instructions.cpp | 8 +++++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f261296..b421723 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -167,7 +167,6 @@ public: bool opt_drop_redundant_mov_to_flags(); bool opt_register_renaming(); bool opt_bank_conflicts(); - unsigned bank_conflict_cycles(const fs_inst *inst) const; bool register_coalesce(); bool compute_to_mrf(); bool eliminate_find_live_channel(); diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index f10caf2..ec19dc6 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -935,20 +935,16 @@ fs_visitor::opt_bank_conflicts() } /** - * Estimate the number of GRF bank conflict cycles incurred by an instruction. + * Return whether the instruction incurs GRF bank conflict cycles. * - * Note that this neglects conflict cycles prior to register allocation - * because we don't know which bank each VGRF is going to end up aligned to. + * Note that this is only accurate after register allocation because otherwise + * we don't know which bank each VGRF is going to end up aligned to. */ -unsigned -fs_visitor::bank_conflict_cycles(const fs_inst *inst) const +bool +has_bank_conflict(const gen_device_info *devinfo, const fs_inst *inst) { - if (grf_used && inst->is_3src(devinfo) && - is_grf(inst->src[1]) && is_grf(inst->src[2]) && - bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) && - !is_conflict_optimized_out(devinfo, inst)) { - return DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); - } else { - return 0; - } + return inst->is_3src(devinfo) && + is_grf(inst->src[1]) && is_grf(inst->src[2]) && + bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) && + !is_conflict_optimized_out(devinfo, inst); } diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index f7b389d..6ba3a6c 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -667,4 +667,7 @@ is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst) alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written; } +bool +has_bank_conflict(const gen_device_info *devinfo, const fs_inst *inst); + #endif diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 7c1390b..6edafc8 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1649,10 +1649,12 @@ vec4_instruction_scheduler::choose_instruction_to_schedule() } int -fs_instruction_scheduler::issue_time(backend_instruction *inst) +fs_instruction_scheduler::issue_time(backend_instruction *inst0) { - const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst); - if (is_compressed((fs_inst *)inst)) + const fs_inst *inst = static_cast(inst0); + const unsigned overhead = v->grf_used && has_bank_conflict(v->devinfo, inst) ? + DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE) : 0; + if (is_compressed(inst)) return 4 + overhead; else return 2 + overhead; -- 2.7.4