aco: don't optimize cross-lane instructions across p_wqm

author Daniel Schürmann <daniel@schuermann.dev>

Mon, 4 Sep 2023 13:32:24 +0000 (15:32 +0200)

committer Marge Bot <emma+marge@anholt.net>

Thu, 14 Sep 2023 09:25:22 +0000 (09:25 +0000)
author Daniel Schürmann <daniel@schuermann.dev>
Mon, 4 Sep 2023 13:32:24 +0000 (15:32 +0200)
committer Marge Bot <emma+marge@anholt.net>
Thu, 14 Sep 2023 09:25:22 +0000 (09:25 +0000)
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp

index f7619d3..d73adc4 100644 (file)
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -382,7 +382,7 @@ process_block(vn_ctx& ctx, Block& block)
        }
  
        if (instr->opcode == aco_opcode::p_discard_if ||
-          instr->opcode == aco_opcode::p_demote_to_helper)
+          instr->opcode == aco_opcode::p_demote_to_helper || instr->opcode == aco_opcode::p_wqm)
           ctx.exec_id++;
  
        if (!can_eliminate(instr)) {
diff --git a/src/amd/compiler/tests/test_d3d11_derivs.cpp b/src/amd/compiler/tests/test_d3d11_derivs.cpp

index ee0299e..c0742e0 100644 (file)
--- a/src/amd/compiler/tests/test_d3d11_derivs.cpp
+++ b/src/amd/compiler/tests/test_d3d11_derivs.cpp
@@ -599,11 +599,13 @@ BEGIN_TEST(d3d11_derivs.get_lod)
     //>> v2: %vec = p_create_vector %x, %y
     //>> lv2: %wqm = p_start_linear_vgpr (kill)%vec
     //>> v1: %x0 = v_mov_b32 %x quad_perm:[0,0,0,0] bound_ctrl:1
-   //>> v1: %x1_m_x0 = v_sub_f32 %x, %x0 quad_perm:[1,1,1,1] bound_ctrl:1
-   //>> v1: %x2_m_x0 = v_sub_f32 (kill)%x, (kill)%x0 quad_perm:[2,2,2,2] bound_ctrl:1
+   //>> v1: %x1_m_x0 = v_sub_f32 %x, (kill)%x0 quad_perm:[1,1,1,1] bound_ctrl:1
+   //>> v1: %x1 = v_mov_b32 %x quad_perm:[0,0,0,0] bound_ctrl:1
+   //>> v1: %x2_m_x0 = v_sub_f32 (kill)%x, (kill)%x1 quad_perm:[2,2,2,2] bound_ctrl:1
     //>> v1: %y0 = v_mov_b32 %y quad_perm:[0,0,0,0] bound_ctrl:1
-   //>> v1: %y1_m_y0 = v_sub_f32 %y, %y0 quad_perm:[1,1,1,1] bound_ctrl:1
-   //>> v1: %y2_m_y0 = v_sub_f32 (kill)%y, (kill)%y0 quad_perm:[2,2,2,2] bound_ctrl:1
+   //>> v1: %y1_m_y0 = v_sub_f32 %y, (kill)%y0 quad_perm:[1,1,1,1] bound_ctrl:1
+   //>> v1: %y1 = v_mov_b32 %y quad_perm:[0,0,0,0] bound_ctrl:1
+   //>> v1: %y2_m_y0 = v_sub_f32 (kill)%y, (kill)%y1 quad_perm:[2,2,2,2] bound_ctrl:1
     //>> BB1
     //>> v2: %_ = image_get_lod (kill)%_, (kill)%_, v1: undef, %wqm 2d
     //>> BB2
author	Daniel Schürmann <daniel@schuermann.dev>
	Mon, 4 Sep 2023 13:32:24 +0000 (15:32 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Thu, 14 Sep 2023 09:25:22 +0000 (09:25 +0000)
src/amd/compiler/aco_opt_value_numbering.cpp		patch \| blob \| history
src/amd/compiler/tests/test_d3d11_derivs.cpp		patch \| blob \| history