From 9919542f1cfff70524bc6117d19bf88e59159caa Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sat, 14 Jan 2017 23:32:12 -0800 Subject: [PATCH] i965: Make DCE set null destinations on messages with side effects. (Co-authored by Matt Turner.) Image atomics, for example, return a value - but the shader may not want to use it. We assigned a useless VGRF destination. This seemed harmless, but it can actually be quite harmful. The register allocator has to assign that VGRF to a real register. It may assign the same actual GRF to the destination of an instruction that follows soon after. This results in a write-after-write (WAW) dependency, and stall. A number of "Deus Ex: Mankind Divided" shaders use image atomics, but don't use the return value. Several of these were hitting WAW stalls for nearly 14,000 (poorly estimated) cycles a pop. Making dead code elimination null out the destination avoids this issue. This patch cuts one shader's estimated cycles by -98.39%! Removing the message response should also help with data cluster bandwidth. On Skylake: (instruction counts remain identical) total cycles in shared programs: 255413890 -> 248081010 (-2.87%) cycles in affected programs: 12019948 -> 4687068 (-61.01%) helped: 24 HURT: 10 v2: Make can_omit_write independent of can_eliminate (Curro). Signed-off-by: Kenneth Graunke Reviewed-by: Francisco Jerez Reviewed-by: Matt Turner --- .../dri/i965/brw_fs_dead_code_eliminate.cpp | 54 ++++++++++++++++------ 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 0dd6091..7adb427 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -34,6 +34,42 @@ * yet in the tail end of this block. */ +/** + * Is it safe to eliminate the instruction? + */ +static bool +can_eliminate(const fs_inst *inst, BITSET_WORD *flag_live) +{ + return !inst->is_control_flow() && + !inst->has_side_effects() && + !(flag_live[0] & inst->flags_written()) && + !inst->writes_accumulator; +} + +/** + * Is it safe to omit the write, making the destination ARF null? + */ +static bool +can_omit_write(const fs_inst *inst) +{ + switch (inst->opcode) { + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + return true; + default: + /* We can eliminate the destination write for ordinary instructions, + * but not most SENDs. + */ + if (inst->opcode < 128 && inst->mlen == 0) + return true; + + /* It might not be safe for other virtual opcodes. */ + return false; + } +} + bool fs_visitor::dead_code_eliminate() { @@ -52,29 +88,21 @@ fs_visitor::dead_code_eliminate() sizeof(BITSET_WORD)); foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { - if (inst->dst.file == VGRF && !inst->has_side_effects()) { + if (inst->dst.file == VGRF) { const unsigned var = live_intervals->var_from_reg(inst->dst); bool result_live = false; for (unsigned i = 0; i < regs_written(inst); i++) result_live |= BITSET_TEST(live, var + i); - if (!result_live) { + if (!result_live && + (can_omit_write(inst) || can_eliminate(inst, flag_live))) { + inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); progress = true; - - if (inst->writes_accumulator || inst->flags_written()) { - inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); - } else { - inst->opcode = BRW_OPCODE_NOP; - } } } - if (inst->dst.is_null() && - !inst->is_control_flow() && - !inst->has_side_effects() && - !(flag_live[0] & inst->flags_written()) && - !inst->writes_accumulator) { + if (inst->dst.is_null() && can_eliminate(inst, flag_live)) { inst->opcode = BRW_OPCODE_NOP; progress = true; } -- 2.7.4