[SimplifyCFG] Handle tail-sinking of more than 2 incoming branches

author James Molloy <james.molloy@arm.com>

Thu, 1 Sep 2016 12:58:13 +0000 (12:58 +0000)

committer James Molloy <james.molloy@arm.com>

Thu, 1 Sep 2016 12:58:13 +0000 (12:58 +0000)
author James Molloy <james.molloy@arm.com>
Thu, 1 Sep 2016 12:58:13 +0000 (12:58 +0000)
committer James Molloy <james.molloy@arm.com>
Thu, 1 Sep 2016 12:58:13 +0000 (12:58 +0000)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

index 4f568ec..81d1705 100644 (file)
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1577,20 +1577,62 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
    assert(BI1->isUnconditional());
    BasicBlock *BBEnd = BI1->getSuccessor(0);
  
-  // We currently only support branch targets with two predecessors.
-  // FIXME: this is an arbitrary restriction and should be lifted.
-  SmallVector<BasicBlock*,4> Blocks;
-  for (auto *BB : predecessors(BBEnd))
-    Blocks.push_back(BB);
-  if (Blocks.size() != 2 ||
-      !all_of(Blocks, [](const BasicBlock *BB) {
-        auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
-        return BI && BI->isUnconditional();
-      }))
+  // We support two situations:
+  //   (1) all incoming arcs are unconditional
+  //   (2) one incoming arc is conditional
+  //
+  // (2) is very common in switch defaults and
+  // else-if patterns;
+  //
+  //   if (a) f(1);
+  //   else if (b) f(2);
+  //
+  // produces:
+  //
+  //       [if]
+  //      /    \
+  //    [f(1)] [if]
+  //      |     | \
+  //      |     |  \
+  //      |  [f(2)]|
+  //       \    | /
+  //        [ end ]
+  //
+  // [end] has two unconditional predecessor arcs and one conditional. The
+  // conditional refers to the implicit empty 'else' arc. This conditional
+  // arc can also be caused by an empty default block in a switch.
+  //
+  // In this case, we attempt to sink code from all *unconditional* arcs.
+  // If we can sink instructions from these arcs (determined during the scan
+  // phase below) we insert a common successor for all unconditional arcs and
+  // connect that to [end], to enable sinking:
+  //
+  //       [if]
+  //      /    \
+  //    [x(1)] [if]
+  //      |     | \
+  //      |     |  \
+  //      |  [x(2)] |
+  //       \   /    |
+  //   [sink.split] |
+  //         \     /
+  //         [ end ]
+  //
+  SmallVector<BasicBlock*,4> UnconditionalPreds;
+  Instruction *Cond = nullptr;
+  for (auto *B : predecessors(BBEnd)) {
+    auto *T = B->getTerminator();
+    if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
+      UnconditionalPreds.push_back(B);
+    else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
+      Cond = T;
+    else
+      return false;
+  }
+  if (UnconditionalPreds.size() < 2)
      return false;
-
+  
    bool Changed = false;
-
    // We take a two-step approach to tail sinking. First we scan from the end of
    // each block upwards in lockstep. If the n'th instruction from the end of each
    // block can be sunk, those instructions are added to ValuesToSink and we
@@ -1600,7 +1642,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
    unsigned ScanIdx = 0;
    SmallPtrSet<Value*,4> InstructionsToSink;
    DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
-  LockstepReverseIterator LRI(Blocks);
+  LockstepReverseIterator LRI(UnconditionalPreds);
    while (LRI.isValid() &&
           canSinkInstructions(*LRI, PHIOperands)) {
      DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n");
@@ -1609,6 +1651,36 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
      --LRI;
    }
  
+  auto ProfitableToSinkLastInstruction = [&]() {
+    LRI.reset();
+    unsigned NumPHIdValues = 0;
+    for (auto *I : *LRI)
+      for (auto *V : PHIOperands[I])
+        if (InstructionsToSink.count(V) == 0)
+          ++NumPHIdValues;
+    DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+    unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
+    if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
+        NumPHIInsts++;
+    
+    return NumPHIInsts <= 1;
+  };
+
+  if (ScanIdx > 0 && Cond) {
+    // Check if we would actually sink anything first!
+    if (!ProfitableToSinkLastInstruction())
+      return false;
+    
+    DEBUG(dbgs() << "SINK: Splitting edge\n");
+    // We have a conditional edge and we're going to sink some instructions.
+    // Insert a new block postdominating all blocks we're going to sink from.
+    if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds,
+                                ".sink.split"))
+      // Edges couldn't be split.
+      return false;
+    Changed = true;
+  }
+  
    // Now that we've analyzed all potential sinking candidates, perform the
    // actual sink. We iteratively sink the last non-terminator of the source
    // blocks into their common successor unless doing so would require too
@@ -1623,32 +1695,22 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
    // This is unlikely in practice though.
    for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
      DEBUG(dbgs() << "SINK: Sink: "
-                 << *Blocks[0]->getTerminator()->getPrevNode()
+                 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
                   << "\n");
  
      // Because we've sunk every instruction in turn, the current instruction to
      // sink is always at index 0.
-    LRI.reset();
-    unsigned NumPHIdValues = 0;
-    for (auto *I : *LRI)
-      for (auto *V : PHIOperands[I])
-        if (InstructionsToSink.count(V) == 0)
-          ++NumPHIdValues;
-    DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
-    unsigned NumPHIInsts = NumPHIdValues / Blocks.size();
-    if (NumPHIdValues % Blocks.size() != 0)
-      NumPHIInsts++;
-    
-    if (NumPHIInsts > 1)
+    if (!ProfitableToSinkLastInstruction()) {
        // Too many PHIs would be created.
+      DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
        break;
+    }
      
-    if (!sinkLastInstruction(Blocks))
+    if (!sinkLastInstruction(UnconditionalPreds))
        return Changed;
      NumSinkCommons++;
      Changed = true;
    }
-  
    return Changed;
  }
  
diff --git a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll

index 4635cfe..c7f213f 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
@@ -2,25 +2,26 @@
  ; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX
  ; <rdar://11417675>
  
-define void @sum(i32* %to) {
+define void @sum(i32 %a, i32* %to, i32 %c) {
  entry:
-  switch i32 undef, label %exit [
+  switch i32 %a, label %exit [
      i32 1, label %bb1
      i32 2, label %bb2
      i32 3, label %bb3
      i32 4, label %bb4
    ]
  bb1:
-  store i32 undef, i32* %to
+  %b = add i32 %c, 1
+  store i32 %b, i32* %to
    br label %exit
  bb2:
-  store i32 undef, i32* %to
+  store i32 2, i32* %to
    br label %exit
  bb3:
-  store i32 undef, i32* %to
+  store i32 3, i32* %to
    br label %exit
  bb4:
-  store i32 undef, i32* %to
+  store i32 4, i32* %to
    br label %exit
  exit:
    ret void
diff --git a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll

index 12e6f2d..3ecb1d4 100644 (file)
--- a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
+++ b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
@@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  
  ; Function Attrs: norecurse nounwind
  define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 {
-; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}), (load 4 from %ir.arrayidx1.{{i[1-2]}})
+; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}})
  entry:
    %cmp = icmp sgt i32 %a, 0
    br i1 %cmp, label %if.then, label %if.end
diff --git a/llvm/test/CodeGen/AArch64/ifcvt-select.ll b/llvm/test/CodeGen/AArch64/ifcvt-select.ll

index 4e024d9..b4815c4 100644 (file)
--- a/llvm/test/CodeGen/AArch64/ifcvt-select.ll
+++ b/llvm/test/CodeGen/AArch64/ifcvt-select.ll
@@ -4,7 +4,7 @@
  define i32 @foo(i32 %a, i32 %b)  {
  entry:
  ;CHECK-LABEL: foo:
-;CHECK: csinc
+;CHECK: cneg
  ;CHECK-NOT: csel
    %sub = sub nsw i32 %b, %a
    %cmp10 = icmp sgt i32 %a, 0
diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll

index f66af7f..22d0584 100644 (file)
--- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -11,9 +11,9 @@
  define void @test_i16_2cmp_signed_1() {
  ; CHECK-LABEL: test_i16_2cmp_signed_1
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
  ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
  entry:
    %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
    %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
@@ -39,7 +39,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
  define void @test_i16_2cmp_signed_2() {
  ; CHECK-LABEL: test_i16_2cmp_signed_2
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
  ; CHECK-NOT: cmp
  ; CHECK: b.ge
  entry:
@@ -67,9 +67,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
  define void @test_i16_2cmp_unsigned_1() {
  ; CHECK-LABEL: test_i16_2cmp_unsigned_1
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
  ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
  entry:
    %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
    %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
@@ -95,7 +95,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
  define void @test_i16_2cmp_unsigned_2() {
  ; CHECK-LABEL: test_i16_2cmp_unsigned_2
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
  ; CHECK-NOT: cmp
  ; CHECK: b.hs
  entry:
@@ -132,9 +132,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
  define void @test_i8_2cmp_signed_1() {
  ; CHECK-LABEL: test_i8_2cmp_signed_1
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
  ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
  entry:
    %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
    %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
@@ -160,7 +160,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
  define void @test_i8_2cmp_signed_2() {
  ; CHECK-LABEL: test_i8_2cmp_signed_2
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
  ; CHECK-NOT: cmp
  ; CHECK: b.ge
  entry:
@@ -188,9 +188,9 @@ if.end8:                                          ; preds = %if.else, %if.then7,
  define void @test_i8_2cmp_unsigned_1() {
  ; CHECK-LABEL: test_i8_2cmp_unsigned_1
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
  ; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
  entry:
    %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
    %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
@@ -216,7 +216,7 @@ if.end8:                                          ; preds = %if.else, %if.then7,
  define void @test_i8_2cmp_unsigned_2() {
  ; CHECK-LABEL: test_i8_2cmp_unsigned_2
  ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
  ; CHECK-NOT: cmp
  ; CHECK: b.hs
  entry:
diff --git a/llvm/test/MC/ARM/data-in-code.ll b/llvm/test/MC/ARM/data-in-code.ll

index 10657a3..c2194e9 100644 (file)
--- a/llvm/test/MC/ARM/data-in-code.ll
+++ b/llvm/test/MC/ARM/data-in-code.ll
@@ -9,7 +9,7 @@
  ;; Ensure that if a jump table is generated that it has Mapping Symbols
  ;; marking the data-in-code region.
  
-define void @foo(i32* %ptr) nounwind ssp {
+define void @foo(i32* %ptr, i32 %b) nounwind ssp {
    %tmp = load i32, i32* %ptr, align 4
    switch i32 %tmp, label %exit [
      i32 0, label %bb0
@@ -18,7 +18,7 @@ define void @foo(i32* %ptr) nounwind ssp {
      i32 3, label %bb3
    ]
  bb0:
-  store i32 0, i32* %ptr, align 4
+  store i32 %b, i32* %ptr, align 4
    br label %exit
  bb1:
    store i32 1, i32* %ptr, align 4
diff --git a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll

index b08d4c1..db9080e 100644 (file)
--- a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -451,6 +451,117 @@ if.end:
  ; CHECK-LABEL: test_crash
  ; No checks for test_crash - just ensure it doesn't crash!
  
+define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.else:
+  br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test16
+; CHECK: zext
+; CHECK-NOT: zext
+
+define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  switch i32 %flag, label %if.end [
+    i32 0, label %if.then
+    i32 1, label %if.then2
+  ]
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %entry ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test17
+; CHECK: if.then:
+; CHECK-NEXT: icmp uge
+; CHECK-NEXT: br label %[[x:.*]]
+
+; CHECK: if.then2:
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp ule
+; CHECK-NEXT: br label %[[x]]
+
+; CHECK: [[x]]:
+; CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp
+; CHECK-NEXT: %[[z:.*]] = zext i1 %[[y]]
+; CHECK-NEXT: br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: phi i8
+; CHECK-DAG: [ %[[z]], %[[x]] ]
+; CHECK-DAG: [ 0, %entry ]
+
+define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+  switch i32 %flag, label %if.then3 [
+    i32 0, label %if.then
+    i32 1, label %if.then2
+  ]
+
+if.then:
+  %cmp = icmp uge i32 %blksA, %nblks
+  %frombool1 = zext i1 %cmp to i8
+  br label %if.end
+
+if.then2:
+  %add = add i32 %nblks, %blksB
+  %cmp2 = icmp ule i32 %add, %blksA
+  %frombool3 = zext i1 %cmp2 to i8
+  br label %if.end
+
+if.then3:
+  %add2 = add i32 %nblks, %blksA
+  %cmp3 = icmp ule i32 %add2, %blksA
+  %frombool4 = zext i1 %cmp3 to i8
+  br label %if.end
+
+if.end:
+  %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ]
+  %tobool4 = icmp ne i8 %obeys.0, 0
+  ret i1 %tobool4
+}
+
+; CHECK-LABEL: test18
+; CHECK: if.end:
+; CHECK-NEXT: %[[x:.*]] = phi i1
+; CHECK-DAG: [ %cmp, %if.then ]
+; CHECK-DAG: [ %cmp2, %if.then2 ]
+; CHECK-DAG: [ %cmp3, %if.then3 ]
+; CHECK-NEXT: zext i1 %[[x]] to i8
+
  ; CHECK: !0 = !{!1, !1, i64 0}
  ; CHECK: !1 = !{!"float", !2}
  ; CHECK: !2 = !{!"an example type tree"}
author	James Molloy <james.molloy@arm.com>
	Thu, 1 Sep 2016 12:58:13 +0000 (12:58 +0000)
committer	James Molloy <james.molloy@arm.com>
	Thu, 1 Sep 2016 12:58:13 +0000 (12:58 +0000)
llvm/lib/Transforms/Utils/SimplifyCFG.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-jumptable.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/ifcvt-select.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll		patch \| blob \| history
llvm/test/MC/ARM/data-in-code.ll		patch \| blob \| history
llvm/test/Transforms/SimplifyCFG/sink-common-code.ll		patch \| blob \| history