aco: only emit waitcnt on loop continues if we there was some load or export
authorDaniel Schürmann <daniel@schuermann.dev>
Thu, 19 Sep 2019 16:48:01 +0000 (18:48 +0200)
committerDaniel Schürmann <daniel@schuermann.dev>
Mon, 23 Sep 2019 11:39:33 +0000 (13:39 +0200)
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
src/amd/compiler/aco_insert_waitcnt.cpp

index d19fdad..9bd9f06 100644 (file)
@@ -648,7 +648,7 @@ void handle_block(Program *program, Block& block, wait_ctx& ctx)
    /* check if this block is at the end of a loop */
    for (unsigned succ_idx : block.linear_succs) {
       /* eliminate any remaining counters */
-      if (succ_idx <= block.index && (ctx.vm_cnt || ctx.exp_cnt || ctx.lgkm_cnt || ctx.vs_cnt)) {
+      if (succ_idx <= block.index && (ctx.vm_cnt || ctx.exp_cnt || ctx.lgkm_cnt || ctx.vs_cnt) && !ctx.gpr_map.empty()) {
          // TODO: we could do better if we only wait if the regs between the block and other predecessors differ
 
          aco_ptr<Instruction> branch = std::move(new_instructions.back());