aco: don't move exec reads around exec writes
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 11 Feb 2022 19:19:45 +0000 (19:19 +0000)
committerMarge Bot <emma+marge@anholt.net>
Wed, 26 Apr 2023 13:16:00 +0000 (13:16 +0000)
Fixes flickering and blocky plants in Jedi: Fallen Order.

Also fixes flickering squares in The Last of Us Part 1.

fossil-db (navi21):
Totals from 92 (0.07% of 135636) affected shaders:
Instrs: 35324 -> 35354 (+0.08%); split: -0.03%, +0.11%
CodeSize: 189568 -> 189668 (+0.05%); split: -0.03%, +0.08%
Latency: 345305 -> 346529 (+0.35%); split: -0.02%, +0.37%
InvThroughput: 78632 -> 78625 (-0.01%)
SClause: 1955 -> 1972 (+0.87%); split: -0.61%, +1.48%
Copies: 1311 -> 1304 (-0.53%); split: -0.69%, +0.15%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8883
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8878
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22696>

src/amd/compiler/aco_scheduler.cpp

index 94429b3..690773a 100644 (file)
@@ -459,6 +459,7 @@ struct hazard_query {
    bool contains_spill;
    bool contains_sendmsg;
    bool uses_exec;
+   bool writes_exec;
    memory_event_set mem_events;
    unsigned aliasing_storage;      /* storage classes which are accessed (non-SMEM) */
    unsigned aliasing_storage_smem; /* storage classes which are accessed (SMEM) */
@@ -471,6 +472,7 @@ init_hazard_query(const sched_ctx& ctx, hazard_query* query)
    query->contains_spill = false;
    query->contains_sendmsg = false;
    query->uses_exec = false;
+   query->writes_exec = false;
    memset(&query->mem_events, 0, sizeof(query->mem_events));
    query->aliasing_storage = 0;
    query->aliasing_storage_smem = 0;
@@ -515,6 +517,10 @@ add_to_hazard_query(hazard_query* query, Instruction* instr)
       query->contains_spill = true;
    query->contains_sendmsg |= instr->opcode == aco_opcode::s_sendmsg;
    query->uses_exec |= needs_exec_mask(instr);
+   for (const Definition& def : instr->definitions) {
+      if (def.isFixed() && def.physReg() == exec)
+         query->writes_exec = true;
+   }
 
    memory_sync_info sync = get_sync_info_with_hack(instr);
 
@@ -560,6 +566,8 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
             return hazard_fail_exec;
       }
    }
+   if (query->writes_exec && needs_exec_mask(instr))
+      return hazard_fail_exec;
 
    /* don't move exports so that they stay closer together */
    if (instr->isEXP())