[AMDGPU] WQM: Ensure exact mode placement before branches
authorCarl Ritson <carl.ritson@amd.com>
Tue, 6 Jun 2023 08:25:22 +0000 (17:25 +0900)
committerCarl Ritson <carl.ritson@amd.com>
Tue, 6 Jun 2023 09:11:35 +0000 (18:11 +0900)
Fix for D151797 where the change accidentally allowed exit to
exact mode between branch instructions.

Reviewed By: dstuttard

Differential Revision: https://reviews.llvm.org/D152228

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/test/CodeGen/AMDGPU/wqm-terminators.mir

index a0500cbd4cd39fd4d4c7eb16c0f81a88a2186a59..c93b2382e2f3ca22a2fccc97fb08a72005c51b19 100644 (file)
@@ -1373,6 +1373,10 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
         Needs = StateExact | StateWQM | StateStrict;
       }
 
+      // Exact mode exit can occur in terminators, but must be before branches.
+      if (MI.isBranch() && OutNeeds == StateExact)
+        Needs = StateExact;
+
       ++Next;
     } else {
       // End of basic block
index 344c9997860db8f12b28eea8a66ba37e2b131996..059fdca13fae29ae997c544f73a30eba0b96568c 100644 (file)
@@ -36,9 +36,11 @@ body: |
   ; CHECK-NEXT:   S_CMP_EQ_U32 [[COPY1]], 0, implicit-def $scc
   ; CHECK-NEXT:   undef %5.sub0:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec
   ; CHECK-NEXT:   %5.sub1:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+  ; CHECK-NEXT:   $exec_lo = S_AND_B32 $exec_lo, [[COPY]], implicit-def $scc
+  ; CHECK-NEXT:   $scc = COPY [[COPY3]]
   ; CHECK-NEXT:   [[IMAGE_SAMPLE_V3_V2_gfx10_:%[0-9]+]]:vreg_96 = IMAGE_SAMPLE_V3_V2_gfx10 %5, [[DEF]], [[DEF1]], 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
-  ; CHECK-NEXT:   $exec_lo = S_AND_B32_term $exec_lo, [[COPY]], implicit-def $scc
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1: