StructurizeCFG: simplify phi nodes when possible

author Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>

Thu, 5 Mar 2020 03:39:46 +0000 (09:09 +0530)

committer Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>

Thu, 5 Mar 2020 05:03:15 +0000 (10:33 +0530)
author Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
Thu, 5 Mar 2020 03:39:46 +0000 (09:09 +0530)
committer Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
Thu, 5 Mar 2020 05:03:15 +0000 (10:33 +0530)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

index 4ce4ce4..15aad80 100644 (file)
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -34,6 +34,7 @@
  #include "llvm/IR/Use.h"
  #include "llvm/IR/User.h"
  #include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
  #include "llvm/InitializePasses.h"
  #include "llvm/Pass.h"
  #include "llvm/Support/Casting.h"
@@ -197,6 +198,7 @@ class StructurizeCFG : public RegionPass {
    SmallVector<RegionNode *, 8> Order;
    BBSet Visited;
  
+  SmallVector<WeakVH, 8> AffectedPhis;
    BBPhiMap DeletedPhis;
    BB2BBVecMap AddedPhis;
  
@@ -232,6 +234,8 @@ class StructurizeCFG : public RegionPass {
  
    void setPhiValues();
  
+  void simplifyAffectedPhis();
+
    void killTerminator(BasicBlock *BB);
  
    void changeExit(RegionNode *Node, BasicBlock *NewExit,
@@ -585,9 +589,14 @@ void StructurizeCFG::insertConditions(bool Loops) {
  void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
    PhiMap &Map = DeletedPhis[To];
    for (PHINode &Phi : To->phis()) {
+    bool Recorded = false;
      while (Phi.getBasicBlockIndex(From) != -1) {
        Value *Deleted = Phi.removeIncomingValue(From, false);
        Map[&Phi].push_back(std::make_pair(From, Deleted));
+      if (!Recorded) {
+        AffectedPhis.push_back(&Phi);
+        Recorded = true;
+      }
      }
    }
  }
@@ -632,28 +641,29 @@ void StructurizeCFG::setPhiValues() {
  
        for (BasicBlock *FI : From)
          Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
+      AffectedPhis.push_back(Phi);
      }
  
      DeletedPhis.erase(To);
    }
    assert(DeletedPhis.empty());
  
-  // Simplify any phis inserted by the SSAUpdater if possible
+  AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
+}
+
+void StructurizeCFG::simplifyAffectedPhis() {
    bool Changed;
    do {
      Changed = false;
-
      SimplifyQuery Q(Func->getParent()->getDataLayout());
      Q.DT = DT;
-    for (size_t i = 0; i < InsertedPhis.size(); ++i) {
-      PHINode *Phi = InsertedPhis[i];
-      if (Value *V = SimplifyInstruction(Phi, Q)) {
-        Phi->replaceAllUsesWith(V);
-        Phi->eraseFromParent();
-        InsertedPhis[i] = InsertedPhis.back();
-        InsertedPhis.pop_back();
-        i--;
-        Changed = true;
+    for (WeakVH VH : AffectedPhis) {
+      if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
+        if (auto NewValue = SimplifyInstruction(Phi, Q)) {
+          Phi->replaceAllUsesWith(NewValue);
+          Phi->eraseFromParent();
+          Changed = true;
+        }
        }
      }
    } while (Changed);
@@ -886,6 +896,7 @@ void StructurizeCFG::createFlow() {
    BasicBlock *Exit = ParentRegion->getExit();
    bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
  
+  AffectedPhis.clear();
    DeletedPhis.clear();
    AddedPhis.clear();
    Conditions.clear();
@@ -1044,6 +1055,7 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
    insertConditions(false);
    insertConditions(true);
    setPhiValues();
+  simplifyAffectedPhis();
    rebuildSSA();
  
    // Cleanup
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll

index b9788e8..6e2868a 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -91,8 +91,8 @@ define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    br label [[BB1:%.*]]
  ; OPT:       bb1:
  ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
-; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
-; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
+; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
+; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
  ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  ; OPT:       bb4:
@@ -100,7 +100,6 @@ define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  ; OPT-NEXT:    br label [[FLOW]]
  ; OPT:       Flow:
-; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
  ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
  ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
  ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
@@ -185,8 +184,8 @@ define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    br label [[BB1:%.*]]
  ; OPT:       bb1:
  ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
-; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
-; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
+; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
+; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
  ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  ; OPT:       bb4:
@@ -194,7 +193,6 @@ define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  ; OPT-NEXT:    br label [[FLOW]]
  ; OPT:       Flow:
-; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
  ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
  ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
  ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
@@ -278,8 +276,8 @@ define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    br label [[BB1:%.*]]
  ; OPT:       bb1:
  ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
-; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
-; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
+; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
+; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
  ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  ; OPT:       bb4:
@@ -287,7 +285,6 @@ define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  ; OPT-NEXT:    br label [[FLOW]]
  ; OPT:       Flow:
-; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
  ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
  ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
  ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
@@ -367,8 +364,8 @@ define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    br label [[BB1:%.*]]
  ; OPT:       bb1:
  ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
-; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
-; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
+; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
+; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
  ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  ; OPT:       bb4:
@@ -376,7 +373,6 @@ define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  ; OPT-NEXT:    br label [[FLOW]]
  ; OPT:       Flow:
-; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
  ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
  ; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
  ; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
@@ -459,8 +455,8 @@ define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    br label [[BB1:%.*]]
  ; OPT:       bb1:
  ; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
-; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
-; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
+; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
+; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
  ; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
  ; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
  ; OPT:       bb4:
@@ -468,7 +464,6 @@ define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
  ; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
  ; OPT-NEXT:    br label [[FLOW]]
  ; OPT:       Flow:
-; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
  ; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
  ; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
  ; OPT-NEXT:    [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll

index e671227..94d273d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -48,8 +48,8 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
  ; IR:       bb4:
  ; IR-NEXT:    br label [[FLOW:%.*]]
  ; IR:       bb5:
-; IR-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP3:%.*]], [[BB10:%.*]] ], [ 0, [[BB:%.*]] ]
-; IR-NEXT:    [[MY_TMP6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[MY_TMP11:%.*]], [[BB10]] ]
+; IR-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP6:%.*]], [[BB10:%.*]] ], [ 0, [[BB:%.*]] ]
+; IR-NEXT:    [[MY_TMP6:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP5:%.*]], [[BB10]] ]
  ; IR-NEXT:    [[MY_TMP7:%.*]] = icmp eq i32 [[MY_TMP6]], 1
  ; IR-NEXT:    [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[MY_TMP7]])
  ; IR-NEXT:    [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
@@ -60,15 +60,13 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
  ; IR:       bb9:
  ; IR-NEXT:    br i1 false, label [[BB3:%.*]], label [[BB9:%.*]]
  ; IR:       bb10:
-; IR-NEXT:    [[MY_TMP11]] = phi i32 [ [[TMP6:%.*]], [[FLOW]] ]
-; IR-NEXT:    [[MY_TMP12:%.*]] = phi i1 [ [[TMP5:%.*]], [[FLOW]] ]
-; IR-NEXT:    [[TMP3]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP12]], i64 [[PHI_BROKEN]])
-; IR-NEXT:    [[TMP4:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP3]])
-; IR-NEXT:    br i1 [[TMP4]], label [[BB23:%.*]], label [[BB5]]
+; IR-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP6]])
+; IR-NEXT:    br i1 [[TMP3]], label [[BB23:%.*]], label [[BB5]]
  ; IR:       Flow:
-; IR-NEXT:    [[TMP5]] = phi i1 [ [[MY_TMP22:%.*]], [[BB4]] ], [ true, [[BB5]] ]
-; IR-NEXT:    [[TMP6]] = phi i32 [ [[MY_TMP21:%.*]], [[BB4]] ], [ undef, [[BB5]] ]
+; IR-NEXT:    [[TMP4:%.*]] = phi i1 [ [[MY_TMP22:%.*]], [[BB4]] ], [ true, [[BB5]] ]
+; IR-NEXT:    [[TMP5]] = phi i32 [ [[MY_TMP21:%.*]], [[BB4]] ], [ undef, [[BB5]] ]
  ; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
+; IR-NEXT:    [[TMP6]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN]])
  ; IR-NEXT:    br label [[BB10]]
  ; IR:       bb13:
  ; IR-NEXT:    [[MY_TMP14:%.*]] = phi i1 [ [[MY_TMP22]], [[BB3]] ], [ true, [[BB8]] ]
@@ -84,7 +82,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
  ; IR-NEXT:    [[MY_TMP22]] = phi i1 [ false, [[BB16]] ], [ [[MY_TMP14]], [[BB13]] ]
  ; IR-NEXT:    br label [[BB9]]
  ; IR:       bb23:
-; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]])
+; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
  ; IR-NEXT:    ret void
  ;
  bb:
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll

index e075991..42462b7 100644 (file)
--- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll
+++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll
@@ -28,7 +28,7 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
  ; CHECK-NEXT:    [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4
  ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
  ; CHECK:       LOOP.HEADER:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[FLOW3:%.*]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[FLOW3:%.*]] ]
  ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x100b
  ; CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[I]] to i64
  ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]]
@@ -50,8 +50,8 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
  ; CHECK-NEXT:    [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52
  ; CHECK-NEXT:    br label [[INNER_LOOP:%.*]]
  ; CHECK:       Flow2:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP9:%.*]], [[FLOW]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP11:%.*]], [[FLOW]] ]
+; CHECK-NEXT:    [[TMP4]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP8:%.*]], [[FLOW]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP10:%.*]], [[FLOW]] ]
  ; CHECK-NEXT:    br i1 [[TMP5]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]]
  ; CHECK:       INNER_LOOP:
  ; CHECK-NEXT:    [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ]
@@ -68,28 +68,26 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0
  ; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[LOAD13]], true
  ; CHECK-NEXT:    br i1 [[TMP6]], label [[INCREMENT_I]], label [[FLOW1:%.*]]
  ; CHECK:       Flow3:
-; CHECK-NEXT:    [[TMP7]] = phi i32 [ [[I_FINAL:%.*]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW2]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[FLOW4:%.*]], label [[LOOP_HEADER]]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[FLOW4:%.*]], label [[LOOP_HEADER]]
  ; CHECK:       Flow4:
-; CHECK-NEXT:    br i1 [[TMP10:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    br i1 [[TMP9:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]]
  ; CHECK:       bb64:
  ; CHECK-NEXT:    call void asm sideeffect "s_nop 42", "~{memory}"() #0
  ; CHECK-NEXT:    br label [[RETURN]]
  ; CHECK:       Flow:
-; CHECK-NEXT:    [[TMP9]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[TMP10]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[TMP11]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[TMP12:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[BB18]], label [[FLOW2]]
+; CHECK-NEXT:    [[TMP8]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP9]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP10]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[BB18]], label [[FLOW2]]
  ; CHECK:       INCREMENT_I:
  ; CHECK-NEXT:    [[INC_I]] = add i32 [[I]], 1
  ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x1336
  ; CHECK-NEXT:    br label [[FLOW1]]
  ; CHECK:       END_ELSE_BLOCK:
-; CHECK-NEXT:    [[I_FINAL]] = phi i32 [ [[TMP4]], [[FLOW2]] ]
  ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x1337
-; CHECK-NEXT:    [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[I_FINAL]], -1
+; CHECK-NEXT:    [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP4]], -1
  ; CHECK-NEXT:    br label [[FLOW3]]
  ; CHECK:       RETURN:
  ; CHECK-NEXT:    call void asm sideeffect "s_nop 0x99
author	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
	Thu, 5 Mar 2020 03:39:46 +0000 (09:09 +0530)
committer	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
	Thu, 5 Mar 2020 05:03:15 +0000 (10:33 +0530)
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/loop_break.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll		patch \| blob \| history
llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll		patch \| blob \| history