[AMDGPU] Make sure to fix implicit operands on insertBranch
authordstuttar <david.stuttard@amd.com>
Tue, 23 Jun 2020 16:47:58 +0000 (17:47 +0100)
committerdstuttar <david.stuttard@amd.com>
Wed, 24 Jun 2020 15:50:48 +0000 (16:50 +0100)
Summary:
Without fixImplicitOperands we may end up creating default implicit operands
that are the wrong wave size

Includes simple test that provokes insertBranch in the correct way to expose the
issue being fixed.

Change-Id: I92bdcdee9fcb7b4d91529b84e76a48ac8218483e

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82459

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/insert-branch-w32.mir [new file with mode: 0644]

index a0c25fa..64dfd4a 100644 (file)
@@ -2265,6 +2265,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
 
     // Copy the flags onto the implicit condition register operand.
     preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
+    fixImplicitOperands(*CondBr);
 
     if (BytesAdded)
       *BytesAdded = 4;
@@ -3326,7 +3327,8 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI,
                                    const MachineOperand &Orig) {
 
   for (MachineOperand &Use : MI.implicit_operands()) {
-    if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
+    if (Use.isUse() &&
+        (Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) {
       Use.setIsUndef(Orig.isUndef());
       Use.setIsKill(Orig.isKill());
       return;
diff --git a/llvm/test/CodeGen/AMDGPU/insert-branch-w32.mir b/llvm/test/CodeGen/AMDGPU/insert-branch-w32.mir
new file mode 100644 (file)
index 0000000..5ccf84d
--- /dev/null
@@ -0,0 +1,47 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass branch-folder -o - %s | FileCheck %s
+
+# Designed to provoke calling SIInstrInfo::insertBranch in wave32 mode
+# The implicit $vcc operand should be $vcc_lo in this case
+
+...
+# CHECK-LABEL: bb.1:
+# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+
+name:            _amdgpu_cs_main
+body:             |
+  bb.0:
+    $vgpr1 = V_MOV_B32_e32 1050, implicit $exec
+    $sgpr0 = S_MOV_B32 1123418112
+    $vcc_hi = IMPLICIT_DEF
+  bb.1:
+    $vgpr0 = COPY killed $vgpr1, implicit $exec
+    V_CMP_GT_U32_e32 5, $vgpr1, implicit-def $vcc_lo, implicit $exec, implicit-def $vcc
+    $vcc_lo = S_AND_B32 $exec_lo, $vcc_lo, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo, implicit $vcc
+    S_BRANCH %bb.2
+  
+  bb.2:
+    $sgpr1 = COPY $sgpr0
+    S_BRANCH %bb.1
+
+...
+
+# CHECK-LABEL: bb.1:
+# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc_lo
+---
+name:            _amdgpu_cs_main_undef
+body:             |
+  bb.0:
+    $vgpr1 = V_MOV_B32_e32 1050, implicit $exec
+    $sgpr0 = S_MOV_B32 1123418112
+    $vcc_hi = IMPLICIT_DEF
+  bb.1:
+    $vgpr0 = COPY killed $vgpr1, implicit $exec
+    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc_lo, implicit undef $vcc
+    S_BRANCH %bb.2
+  
+  bb.2:
+    $sgpr1 = COPY $sgpr0
+    S_BRANCH %bb.1
+
+...