[SimplifyCFG] Common code sinking: fix application of profitability check

author Roman Lebedev <lebedev.ri@gmail.com>

Thu, 29 Apr 2021 16:20:06 +0000 (19:20 +0300)

committer Roman Lebedev <lebedev.ri@gmail.com>

Thu, 29 Apr 2021 18:11:40 +0000 (21:11 +0300)
author Roman Lebedev <lebedev.ri@gmail.com>
Thu, 29 Apr 2021 16:20:06 +0000 (19:20 +0300)
committer Roman Lebedev <lebedev.ri@gmail.com>
Thu, 29 Apr 2021 18:11:40 +0000 (21:11 +0300)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

index 2369935..31c50f4 100644 (file)
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1933,6 +1933,20 @@ namespace {
        }
      }
  
+    void operator++() {
+      if (Fail)
+        return;
+      for (auto *&Inst : Insts) {
+        for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+          Inst = Inst->getNextNode();
+        // Already at end of block.
+        if (!Inst) {
+          Fail = true;
+          return;
+        }
+      }
+    }
+
      ArrayRef<Instruction*> operator * () const {
        return Insts;
      }
@@ -2005,7 +2019,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
    // carry on. If we can sink an instruction but need to PHI-merge some operands
    // (because they're not identical in each instruction) we add these to
    // PHIOperands.
-  unsigned ScanIdx = 0;
+  int ScanIdx = 0;
    SmallPtrSet<Value*,4> InstructionsToSink;
    DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
    LockstepReverseIterator LRI(UnconditionalPreds);
@@ -2041,8 +2055,24 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
  
      return NumPHIInsts <= 1;
    };
+
+  // If no instructions can be sunk, early-return.
+  if (ScanIdx == 0)
+    return false;
+
+  // We've determined that we are going to sink last ScanIdx instructions,
+  // and recorded them in InstructionsToSink. Now, some instructions may be
+  // unprofitable to sink. But that determination depends on the instructions
+  // that we are going to sink.
+
+  // First, forward scan: find the first instruction unprofitable to sink,
+  // recording all the ones that are profitable to sink.
+  // FIXME: would it be better, after we detect that not all are profitable.
+  // to either record the profitable ones, or erase the unprofitable ones?
+  // Maybe we need to choose (at runtime) the one that will touch least instrs?
    LRI.reset();
-  unsigned Idx = 0;
+  int Idx = 0;
+  SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
    while (Idx < ScanIdx) {
      if (!ProfitableToSinkInstruction(LRI)) {
        // Too many PHIs would be created.
@@ -2050,10 +2080,43 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
            dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
        break;
      }
+    InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
      --LRI;
      ++Idx;
    }
-  ScanIdx = Idx;
+
+  // If no instructions can be sunk, early-return.
+  if (Idx == 0)
+    return false;
+
+  // Did we determine that (only) some instructions are unprofitable to sink?
+  if (Idx < ScanIdx) {
+    // Okay, some instructions are unprofitable.
+    ScanIdx = Idx;
+    InstructionsToSink = InstructionsProfitableToSink;
+
+    // But, that may make other instructions unprofitable, too.
+    // So, do a backward scan, do any earlier instructions become unprofitable?
+    assert(!ProfitableToSinkInstruction(LRI) &&
+           "We already know that the last instruction is unprofitable to sink");
+    ++LRI;
+    --Idx;
+    while (Idx >= 0) {
+      // If we detect that an instruction becomes unprofitable to sink,
+      // all earlier instructions won't be sunk either,
+      // so preemptively keep InstructionsProfitableToSink in sync.
+      // FIXME: is this the most performant approach?
+      for (auto *I : *LRI)
+        InstructionsProfitableToSink.erase(I);
+      if (!ProfitableToSinkInstruction(LRI)) {
+        // Everything starting with this instruction won't be sunk.
+        ScanIdx = Idx;
+        InstructionsToSink = InstructionsProfitableToSink;
+      }
+      ++LRI;
+      --Idx;
+    }
+  }
  
    // If no instructions can be sunk, early-return.
    if (ScanIdx == 0)
@@ -2068,7 +2131,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
      // don't do it unless we'd sink at least one non-speculatable instruction.
      // See https://bugs.llvm.org/show_bug.cgi?id=30244
      LRI.reset();
-    unsigned Idx = 0;
+    int Idx = 0;
      bool Profitable = false;
      while (Idx < ScanIdx) {
        if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
@@ -2102,7 +2165,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
    // sink presuming a later value will also be sunk, but stop half way through
    // and never actually sink it which means we produce more PHIs than intended.
    // This is unlikely in practice though.
-  unsigned SinkIdx = 0;
+  int SinkIdx = 0;
    for (; SinkIdx != ScanIdx; ++SinkIdx) {
      LLVM_DEBUG(dbgs() << "SINK: Sink: "
                        << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll

index a85ad8e..c996682 100644 (file)
--- a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll
@@ -1496,17 +1496,17 @@ define void @creating_too_many_phis(i1 %cond, i32 %a, i32 %b, i32 %c, i32 %d, i3
  ; CHECK-NEXT:    [[V0:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
  ; CHECK-NEXT:    [[V1:%.*]] = add i32 [[V0]], [[C:%.*]]
  ; CHECK-NEXT:    [[V2:%.*]] = add i32 [[D:%.*]], [[E:%.*]]
+; CHECK-NEXT:    [[R3:%.*]] = add i32 [[V1]], [[V2]]
  ; CHECK-NEXT:    br label [[END:%.*]]
  ; CHECK:       bb1:
  ; CHECK-NEXT:    [[V4:%.*]] = add i32 [[A]], [[B]]
  ; CHECK-NEXT:    [[V5:%.*]] = add i32 [[V4]], [[C]]
  ; CHECK-NEXT:    [[V6:%.*]] = add i32 [[G:%.*]], [[H:%.*]]
+; CHECK-NEXT:    [[R7:%.*]] = add i32 [[V5]], [[V6]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[V6_SINK:%.*]] = phi i32 [ [[V6]], [[BB1]] ], [ [[V2]], [[BB0]] ]
-; CHECK-NEXT:    [[V5_SINK:%.*]] = phi i32 [ [[V5]], [[BB1]] ], [ [[V1]], [[BB0]] ]
-; CHECK-NEXT:    [[R7:%.*]] = add i32 [[V5_SINK]], [[V6_SINK]]
-; CHECK-NEXT:    call void @use32(i32 [[R7]])
+; CHECK-NEXT:    [[R7_SINK:%.*]] = phi i32 [ [[R7]], [[BB1]] ], [ [[R3]], [[BB0]] ]
+; CHECK-NEXT:    call void @use32(i32 [[R7_SINK]])
  ; CHECK-NEXT:    ret void
  ;
    br i1 %cond, label %bb0, label %bb1
author	Roman Lebedev <lebedev.ri@gmail.com>
	Thu, 29 Apr 2021 16:20:06 +0000 (19:20 +0300)
committer	Roman Lebedev <lebedev.ri@gmail.com>
	Thu, 29 Apr 2021 18:11:40 +0000 (21:11 +0300)
llvm/lib/Transforms/Utils/SimplifyCFG.cpp		patch \| blob \| history
llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll		patch \| blob \| history