AMDGPU/GlobalISel: Fix masked control flow with fallthrough blocks

author Matt Arsenault <Matthew.Arsenault@amd.com>

Sun, 17 May 2020 14:51:22 +0000 (10:51 -0400)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 22 May 2020 14:31:44 +0000 (10:31 -0400)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Sun, 17 May 2020 14:51:22 +0000 (10:51 -0400)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 22 May 2020 14:31:44 +0000 (10:31 -0400)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

index 554075a..63106df 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2278,22 +2278,30 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
  // Return the use branch instruction, otherwise null if the usage is invalid.
  static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
-                                       MachineInstr *&Br) {
+                                       MachineInstr *&Br,
+                                       MachineBasicBlock *&UncondBrTarget) {
    Register CondDef = MI.getOperand(0).getReg();
    if (!MRI.hasOneNonDBGUse(CondDef))
      return nullptr;
  
+  MachineBasicBlock *Parent = MI.getParent();
    MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
-  if (UseMI.getParent() != MI.getParent() ||
+  if (UseMI.getParent() != Parent ||
        UseMI.getOpcode() != AMDGPU::G_BRCOND)
      return nullptr;
  
-  // Make sure the cond br is followed by a G_BR
+  // Make sure the cond br is followed by a G_BR, or is the last instruction.
    MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
-  if (Next != MI.getParent()->end()) {
+  if (Next == Parent->end()) {
+    MachineFunction::iterator NextMBB = std::next(Parent->getIterator());
+    if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use.
+      return nullptr;
+    UncondBrTarget = &*NextMBB;
+  } else {
      if (Next->getOpcode() != AMDGPU::G_BR)
        return nullptr;
      Br = &*Next;
+    UncondBrTarget = Br->getOperand(0).getMBB();
    }
  
    return &UseMI;
@@ -4110,7 +4118,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
    case Intrinsic::amdgcn_if:
    case Intrinsic::amdgcn_else: {
      MachineInstr *Br = nullptr;
-    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
+    MachineBasicBlock *UncondBrTarget = nullptr;
+    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
        const SIRegisterInfo *TRI
          = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
  
@@ -4118,25 +4127,28 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
        Register Def = MI.getOperand(1).getReg();
        Register Use = MI.getOperand(3).getReg();
  
-      MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();
-      if (Br)
-        BrTarget = Br->getOperand(0).getMBB();
-
+      MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
        if (IntrID == Intrinsic::amdgcn_if) {
          B.buildInstr(AMDGPU::SI_IF)
            .addDef(Def)
            .addUse(Use)
-          .addMBB(BrTarget);
+          .addMBB(UncondBrTarget);
        } else {
          B.buildInstr(AMDGPU::SI_ELSE)
            .addDef(Def)
            .addUse(Use)
-          .addMBB(BrTarget)
+          .addMBB(UncondBrTarget)
            .addImm(0);
        }
  
-      if (Br)
-        Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());
+      if (Br) {
+        Br->getOperand(0).setMBB(CondBrTarget);
+      } else {
+        // The IRTranslator skips inserting the G_BR for fallthrough cases, but
+        // since we're swapping branch targets it needs to be reinserted.
+        // FIXME: IRTranslator should probably not do this
+        B.buildBr(*CondBrTarget);
+      }
  
        MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
        MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
@@ -4149,23 +4161,23 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
    }
    case Intrinsic::amdgcn_loop: {
      MachineInstr *Br = nullptr;
-    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
+    MachineBasicBlock *UncondBrTarget = nullptr;
+    if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
        const SIRegisterInfo *TRI
          = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
  
        B.setInstr(*BrCond);
  
-      MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();
-      if (Br)
-        BrTarget = Br->getOperand(0).getMBB();
-
+      MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
        Register Reg = MI.getOperand(2).getReg();
        B.buildInstr(AMDGPU::SI_LOOP)
          .addUse(Reg)
-        .addMBB(BrTarget);
+        .addMBB(UncondBrTarget);
  
        if (Br)
-        Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());
+        Br->getOperand(0).setMBB(CondBrTarget);
+      else
+        B.buildBr(*CondBrTarget);
  
        MI.eraseFromParent();
        BrCond->eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index e3197e5..3ddf4ae 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4714,6 +4714,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
    } else {
      // Get the target from BR if we don't negate the condition
      BR = findUser(BRCOND, ISD::BR);
+    assert(BR && "brcond missing unconditional branch user");
      Target = BR->getOperand(1);
    }
  
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll

index 0be830b..bd313de 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -37,7 +37,7 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
  ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; CHECK-NEXT:    ; implicit-def: $vgpr0
  ; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT:    s_cbranch_execnz BB1_2
+; CHECK-NEXT:    s_cbranch_execz BB1_2
  ; CHECK-NEXT:  ; %bb.1: ; %if.true
  ; CHECK-NEXT:    global_load_dword v0, v[0:1], off
  ; CHECK-NEXT:  BB1_2: ; %endif
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir

new file mode 100644 (file)

index 0000000..9716bb3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir
@@ -0,0 +1,21 @@
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
+
+# Make sure there's no crash if there is somehow no successor block.
+
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
+
+---
+name: brcond_si_if_no_succ_block
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    successors: %bb.1
+    liveins: $vgpr0, $vgpr1
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s1) = G_ICMP intpred(ne), %0, %1
+    %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+    G_BRCOND %3, %bb.1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir

index 7a32dad..52d44d2 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir
@@ -108,6 +108,7 @@ body:             |
    ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; WAVE64:   [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
    ; WAVE64:   [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
    ; WAVE64: bb.1:
    ; WAVE32-LABEL: name: brcond_si_if
    ; WAVE32: bb.0:
@@ -116,6 +117,7 @@ body:             |
    ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; WAVE32:   [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
    ; WAVE32:   [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
    ; WAVE32: bb.1:
    bb.0:
      successors: %bb.1
@@ -139,6 +141,7 @@ body:             |
    ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; WAVE64:   [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
    ; WAVE64:   [[SI_ELSE:%[0-9]+]]:sreg_64_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
    ; WAVE64: bb.1:
    ; WAVE32-LABEL: name: brcond_si_else
    ; WAVE32: bb.0:
@@ -147,6 +150,7 @@ body:             |
    ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; WAVE32:   [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
    ; WAVE32:   [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_ELSE [[ICMP]](s1), %bb.1, 0, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
    ; WAVE32: bb.1:
    bb.0:
      successors: %bb.1
@@ -161,32 +165,148 @@ body:             |
  ...
  
  ---
-name: brcond_si_loop
+name: brcond_si_loop_brcond
+tracksRegLiveness: true
  body:             |
-  ; WAVE64-LABEL: name: brcond_si_loop
+  ; WAVE64-LABEL: name: brcond_si_loop_brcond
    ; WAVE64: bb.0:
    ; WAVE64:   successors: %bb.1(0x80000000)
+  ; WAVE64:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
    ; WAVE64:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; WAVE64:   [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
-  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
    ; WAVE64: bb.1:
-  ; WAVE32-LABEL: name: brcond_si_loop
+  ; WAVE64:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE64:   S_NOP 0
+  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.2
+  ; WAVE64: bb.2:
+  ; WAVE64:   S_NOP 0
+  ; WAVE32-LABEL: name: brcond_si_loop_brcond
    ; WAVE32: bb.0:
    ; WAVE32:   successors: %bb.1(0x80000000)
+  ; WAVE32:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
    ; WAVE32:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; WAVE32:   [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE32: bb.1:
+  ; WAVE32:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE32:   S_NOP 0
    ; WAVE32:   SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.2
+  ; WAVE32: bb.2:
+  ; WAVE32:   S_NOP 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s64) = COPY $sgpr0_sgpr1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    S_NOP 0
+    %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+    G_BRCOND %3, %bb.2
+    G_BR %bb.1
+
+  bb.2:
+    S_NOP 0
+...
+
+# This usage is backwards from how the intrinsic is supposed to be
+# used.
+---
+name: brcond_si_loop_brcond_back
+tracksRegLiveness: true
+body:             |
+  ; WAVE64-LABEL: name: brcond_si_loop_brcond_back
+  ; WAVE64: bb.0:
+  ; WAVE64:   successors: %bb.1(0x80000000)
+  ; WAVE64:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE64:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE64:   [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE64: bb.1:
+  ; WAVE64:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE64:   S_NOP 0
+  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
+  ; WAVE64: bb.2:
+  ; WAVE64:   S_NOP 0
+  ; WAVE32-LABEL: name: brcond_si_loop_brcond_back
+  ; WAVE32: bb.0:
+  ; WAVE32:   successors: %bb.1(0x80000000)
+  ; WAVE32:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE32:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE32:   [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
    ; WAVE32: bb.1:
+  ; WAVE32:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE32:   S_NOP 0
+  ; WAVE32:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
+  ; WAVE32: bb.2:
+  ; WAVE32:   S_NOP 0
    bb.0:
-    successors: %bb.1
      liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
      %0:_(s32) = COPY $vgpr0
      %1:_(s32) = COPY $vgpr1
      %2:_(s64) = COPY $sgpr0_sgpr1
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    S_NOP 0
      %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
      G_BRCOND %3, %bb.1
+    G_BR %bb.2
+
+  bb.2:
+    S_NOP 0
+...
+
+# This usage is backwards from how the intrinsic is supposed to be
+# used.
+---
+name: brcond_si_loop_brcond_back_fallthrough
+tracksRegLiveness: true
+body:             |
+  ; WAVE64-LABEL: name: brcond_si_loop_brcond_back_fallthrough
+  ; WAVE64: bb.0:
+  ; WAVE64:   successors: %bb.1(0x80000000)
+  ; WAVE64:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE64:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE64:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE64:   [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE64: bb.1:
+  ; WAVE64:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE64:   S_NOP 0
+  ; WAVE64:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE64:   G_BR %bb.1
+  ; WAVE64: bb.2:
+  ; WAVE32-LABEL: name: brcond_si_loop_brcond_back_fallthrough
+  ; WAVE32: bb.0:
+  ; WAVE32:   successors: %bb.1(0x80000000)
+  ; WAVE32:   liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+  ; WAVE32:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; WAVE32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; WAVE32:   [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
+  ; WAVE32: bb.1:
+  ; WAVE32:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; WAVE32:   S_NOP 0
+  ; WAVE32:   SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; WAVE32:   G_BR %bb.1
+  ; WAVE32: bb.2:
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s64) = COPY $sgpr0_sgpr1
  
    bb.1:
+    successors: %bb.1, %bb.2
+    S_NOP 0
+    %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+    G_BRCOND %3, %bb.1
+
+  bb.2:
  ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll

index 19471de..d2e06fb 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -164,7 +164,7 @@ define void @localize_internal_globals(i1 %cond) {
  ; GFX9-NEXT:    s_xor_b64 s[4:5], vcc, s[4:5]
  ; GFX9-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
-; GFX9-NEXT:    s_cbranch_execnz BB2_2
+; GFX9-NEXT:    s_cbranch_execz BB2_2
  ; GFX9-NEXT:  ; %bb.1: ; %bb1
  ; GFX9-NEXT:    s_getpc_b64 s[6:7]
  ; GFX9-NEXT:    s_add_u32 s6, s6, static.gv2@rel32@lo+4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll

index 5e619cb..656aa86 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -16,7 +16,7 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
  ; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
  ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB0_2
+; CHECK-NEXT:    s_cbranch_execz BB0_2
  ; CHECK-NEXT:  ; %bb.1:
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
@@ -646,7 +646,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
  ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_2
+; CGP-NEXT:    s_cbranch_execz BB2_2
  ; CGP-NEXT:  ; %bb.1:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
@@ -814,7 +814,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
  ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_6
+; CGP-NEXT:    s_cbranch_execz BB2_6
  ; CGP-NEXT:  ; %bb.5:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
@@ -2318,7 +2318,7 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
  ; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
  ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB7_2
+; CHECK-NEXT:    s_cbranch_execz BB7_2
  ; CHECK-NEXT:  ; %bb.1:
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
@@ -2767,7 +2767,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
  ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_2
+; CGP-NEXT:    s_cbranch_execz BB8_2
  ; CGP-NEXT:  ; %bb.1:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
@@ -2935,7 +2935,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
  ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_6
+; CGP-NEXT:    s_cbranch_execz BB8_6
  ; CGP-NEXT:  ; %bb.5:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

index 4253067..928c592 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -16,7 +16,7 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
  ; CHECK-NEXT:    ; implicit-def: $vgpr4_vgpr5
  ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB0_2
+; CHECK-NEXT:    s_cbranch_execz BB0_2
  ; CHECK-NEXT:  ; %bb.1:
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
@@ -642,7 +642,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
  ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_2
+; CGP-NEXT:    s_cbranch_execz BB2_2
  ; CGP-NEXT:  ; %bb.1:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v4
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v5
@@ -809,7 +809,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
  ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB2_6
+; CGP-NEXT:    s_cbranch_execz BB2_6
  ; CGP-NEXT:  ; %bb.5:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v6
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v7
@@ -2292,7 +2292,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
  ; CHECK-NEXT:    ; implicit-def: $vgpr2_vgpr3
  ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execnz BB7_2
+; CHECK-NEXT:    s_cbranch_execz BB7_2
  ; CHECK-NEXT:  ; %bb.1:
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, v4
  ; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v5
@@ -2738,7 +2738,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
  ; CGP-NEXT:    ; implicit-def: $vgpr0_vgpr1
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_2
+; CGP-NEXT:    s_cbranch_execz BB8_2
  ; CGP-NEXT:  ; %bb.1:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v10
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v1, v11
@@ -2905,7 +2905,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
  ; CGP-NEXT:    ; implicit-def: $vgpr4_vgpr5
  ; CGP-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; CGP-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; CGP-NEXT:    s_cbranch_execnz BB8_6
+; CGP-NEXT:    s_cbranch_execz BB8_6
  ; CGP-NEXT:  ; %bb.5:
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v8
  ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v9
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sun, 17 May 2020 14:51:22 +0000 (10:51 -0400)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 22 May 2020 14:31:44 +0000 (10:31 -0400)
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.if.xfail.mir	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll		patch \| blob \| history