#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
///
/// NB! There may be conditions feeding into \p BI that aren't inductive range
/// checks, and hence don't end up in \p Checks.
- static void
- extractRangeChecksFromBranch(BranchInst *BI, Loop *L, ScalarEvolution &SE,
- BranchProbabilityInfo *BPI,
- SmallVectorImpl<InductiveRangeCheck> &Checks);
+ static void extractRangeChecksFromBranch(
+ BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
+ SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
};
struct LoopStructure;
void InductiveRangeCheck::extractRangeChecksFromBranch(
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
- SmallVectorImpl<InductiveRangeCheck> &Checks) {
+ SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
return;
+ unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
+ assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
+ "No edges coming to loop?");
BranchProbability LikelyTaken(15, 16);
if (!SkipProfitabilityChecks && BPI &&
- BPI->getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
+ BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
return;
+ // IRCE expects branch's true edge comes to loop. Invert branch for opposite
+ // case.
+ if (IndexLoopSucc != 0) {
+ IRBuilder<> Builder(BI);
+ InvertBranch(BI, Builder);
+ if (BPI)
+ BPI->swapSuccEdgesProbabilities(BI->getParent());
+ Changed = true;
+ }
+
SmallPtrSet<Value *, 8> Visited;
InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->getOperandUse(0),
Checks, Visited);
LLVMContext &Context = Preheader->getContext();
SmallVector<InductiveRangeCheck, 16> RangeChecks;
+ bool Changed = false;
for (auto *BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
- RangeChecks);
+ RangeChecks, Changed);
if (RangeChecks.empty())
- return false;
+ return Changed;
auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
OS << "irce: looking at loop "; L->print(OS);
if (!MaybeLoopStructure) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
- return false;
+ return Changed;
}
LoopStructure LS = *MaybeLoopStructure;
if (!isProfitableToTransform(*L, LS))
- return false;
+ return Changed;
const SCEVAddRecExpr *IndVar =
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
}
if (!SafeIterRange)
- return false;
+ return Changed;
LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, *SafeIterRange);
- bool Changed = LC.run();
- if (Changed) {
+ if (LC.run()) {
+ Changed = true;
+
auto PrintConstrainedLoopInfo = [L]() {
dbgs() << "irce: in function ";
dbgs() << L->getHeader()->getParent()->getName() << ": ";
; CHECK: guarded:
; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[INDVAR]]
; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[ADDR]], align 4
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 0
-; CHECK-NEXT: br i1 [[CMP]], label [[ZERO_LOOPEXIT_LOOPEXIT4:%.*]], label [[LATCH]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[RES]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[LATCH]], label [[ZERO_LOOPEXIT_LOOPEXIT4:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[INDVAR_NEXT]] = add nuw nsw i64 [[INDVAR]], 2
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], 3
; CHECK: guarded.postloop:
; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVAR_POSTLOOP]]
; CHECK-NEXT: [[RES_POSTLOOP:%.*]] = load i32, ptr [[ADDR_POSTLOOP]], align 4
-; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp eq i32 [[RES_POSTLOOP]], 0
-; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[ZERO_LOOPEXIT_LOOPEXIT:%.*]], label [[LATCH_POSTLOOP]]
+; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp ne i32 [[RES_POSTLOOP]], 0
+; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LATCH_POSTLOOP]], label [[ZERO_LOOPEXIT_LOOPEXIT:%.*]]
; CHECK: latch.postloop:
; CHECK-NEXT: [[INDVAR_NEXT_POSTLOOP]] = add nuw nsw i64 [[INDVAR_POSTLOOP]], 2
; CHECK-NEXT: [[RES2_POSTLOOP]] = mul i32 [[RES_POSTLOOP]], 3
}
; Same as test_09 but range check comparison is inversed.
-; TODO: IRCE is allowed.
+; IRCE is allowed.
define i32 @test_10(ptr %p, ptr %capacity_p, ptr %num_elements_p) {
; CHECK-LABEL: define i32 @test_10
; CHECK-SAME: (ptr [[P:%.*]], ptr [[CAPACITY_P:%.*]], ptr [[NUM_ELEMENTS_P:%.*]]) {
; CHECK-NEXT: [[CAPACITY:%.*]] = load i32, ptr [[CAPACITY_P]], align 4, !range [[RNG16]]
; CHECK-NEXT: [[NUM_ELEMENTS:%.*]] = load i32, ptr [[NUM_ELEMENTS_P]], align 4, !range [[RNG16]]
; CHECK-NEXT: [[LIMIT:%.*]] = sub i32 [[CAPACITY]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[CAPACITY]], -3
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[CAPACITY]], 2147483646
+; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[SMAX]]
+; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LIMIT]], i32 0)
+; CHECK-NEXT: [[SMAX2:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1)
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[SMAX2]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[SMIN3:%.*]] = call i32 @llvm.smin.i32(i32 [[NUM_ELEMENTS]], i32 [[TMP4]])
+; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN3]], i32 0)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK: loop.preheader:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
-; CHECK-NEXT: [[CAPACITY_CHECK:%.*]] = icmp sge i32 [[IV]], [[LIMIT]]
-; CHECK-NEXT: br i1 [[CAPACITY_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]], !prof [[PROF19:![0-9]+]]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[CAPACITY_CHECK:%.*]] = icmp slt i32 [[IV]], [[LIMIT]]
+; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof [[PROF17]]
; CHECK: backedge:
; CHECK-NEXT: [[IV_WIDE:%.*]] = zext i32 [[IV]] to i64
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE]]
; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[NUM_ELEMENTS]]
-; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[IV_NEXT]], [[EXIT_MAINLOOP_AT]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK: main.exit.selector:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[IV_NEXT_LCSSA]], [[NUM_ELEMENTS]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]]
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: [[IV_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT: br label [[POSTLOOP:%.*]]
+; CHECK: exit.loopexit:
+; CHECK-NEXT: [[IV_LCSSA1_PH:%.*]] = phi i32 [ [[IV_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP:%.*]] ]
+; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
+; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV_LCSSA]], [[MAIN_EXIT_SELECTOR]] ], [ [[IV_LCSSA1_PH]], [[EXIT_LOOPEXIT:%.*]] ]
; CHECK-NEXT: ret i32 [[IV_LCSSA1]]
+; CHECK: out_of_bounds.loopexit:
+; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]]
+; CHECK: out_of_bounds.loopexit5:
+; CHECK-NEXT: br label [[OUT_OF_BOUNDS]]
; CHECK: out_of_bounds:
; CHECK-NEXT: ret i32 -1
+; CHECK: postloop:
+; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]]
+; CHECK: loop.postloop:
+; CHECK-NEXT: [[IV_POSTLOOP]] = phi i32 [ [[IV_COPY]], [[POSTLOOP]] ], [ [[IV_NEXT_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP]] ]
+; CHECK-NEXT: [[CAPACITY_CHECK_POSTLOOP:%.*]] = icmp slt i32 [[IV_POSTLOOP]], [[LIMIT]]
+; CHECK-NEXT: br i1 [[CAPACITY_CHECK_POSTLOOP]], label [[BACKEDGE_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof [[PROF17]]
+; CHECK: backedge.postloop:
+; CHECK-NEXT: [[IV_WIDE_POSTLOOP:%.*]] = zext i32 [[IV_POSTLOOP]] to i64
+; CHECK-NEXT: [[EL_PTR_POSTLOOP:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE_POSTLOOP]]
+; CHECK-NEXT: store i32 1, ptr [[EL_PTR_POSTLOOP]], align 4
+; CHECK-NEXT: [[IV_NEXT_POSTLOOP]] = add nuw nsw i32 [[IV_POSTLOOP]], 4
+; CHECK-NEXT: [[LOOP_COND_POSTLOOP:%.*]] = icmp slt i32 [[IV_NEXT_POSTLOOP]], [[NUM_ELEMENTS]]
+; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP19:![0-9]+]], !irce.loop.clone !6
;
entry:
%capacity = load i32, ptr %capacity_p, !range !4