aco: don't optimize DPP across more than one block

author Georg Lehmann <dadschoorse@gmail.com>

Fri, 24 Nov 2023 14:45:44 +0000 (15:45 +0100)

committer Eric Engestrom <eric@engestrom.ch>

Sun, 3 Dec 2023 07:57:23 +0000 (07:57 +0000)
author Georg Lehmann <dadschoorse@gmail.com>
Fri, 24 Nov 2023 14:45:44 +0000 (15:45 +0100)
committer Eric Engestrom <eric@engestrom.ch>
Sun, 3 Dec 2023 07:57:23 +0000 (07:57 +0000)
diff --git a/.pick_status.json b/.pick_status.json

index c076dd02c3a0b72991c0eb7f96c04f244aff4445..457e73460bf6a1b16712cb6ff9c20bbad287bafb 100644 (file)
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1844,7 +1844,7 @@
          "description": "aco: don't optimize DPP across more than one block",
          "nominated": true,
          "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
          "main_sha": null,
          "because_sha": null,
          "notes": null
diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp

index 48ada196926ee7e21b4c455f6d5705a5ca2c2512..5978e7c4b4623daea49e7077781cf8eba0bbea33 100644 (file)
--- a/src/amd/compiler/aco_optimizer_postRA.cpp
+++ b/src/amd/compiler/aco_optimizer_postRA.cpp
@@ -492,6 +492,13 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
        if (!op_instr_idx.found())
           continue;
  
+      /* is_overwritten_since only considers active lanes when the register could possibly
+       * have been overwritten from inactive lanes. Restrict this optimization to at most
+       * one block so that there is no possibility for clobbered inactive lanes.
+       */
+      if (ctx.current_block->index - op_instr_idx.block > 1)
+         continue;
+
        const Instruction* mov = ctx.get(op_instr_idx);
        if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP())
           continue;
diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp

index 811e762399b055d2be5472b7aee8ac653f4933f7..c0cb4fc8a2f5070da3347ca862e38d857ae6099a 100644 (file)
--- a/src/amd/compiler/tests/test_optimizer_postRA.cpp
+++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp
@@ -571,6 +571,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
           //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
           bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
  
+         //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
+         //! p_unit_test 10, %res10:v[12]
+         Temp result =
+            bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
+         writeout(10, Operand(result, reg_v12));
+
           //! p_logical_end
           //! s2: %0:vcc = p_branch BB3
  
@@ -605,12 +611,6 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
     //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
     //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
  
-   //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
-   //! p_unit_test 10, %res10:v[12]
-   Temp result =
-      bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
-   writeout(10, Operand(result, reg_v12));
-
     finish_optimizer_postRA_test();
  END_TEST
author	Georg Lehmann <dadschoorse@gmail.com>
	Fri, 24 Nov 2023 14:45:44 +0000 (15:45 +0100)
committer	Eric Engestrom <eric@engestrom.ch>
	Sun, 3 Dec 2023 07:57:23 +0000 (07:57 +0000)
.pick_status.json		patch \| blob \| history
src/amd/compiler/aco_optimizer_postRA.cpp		patch \| blob \| history
src/amd/compiler/tests/test_optimizer_postRA.cpp		patch \| blob \| history