From 602916d2defa95cf4bec76e458be4375dbe70e4a Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 24 Jan 2023 12:17:33 +0700 Subject: [PATCH] [IndVars] Apply more optimistic SkipLastIter for AND/OR conditions When exit by condition `C1` dominates exit by condition `C2`, and max symbolic exit count for `C1` matches those for loop, we will apply more optimistic logic to `C2` by setting `SkipLastIter` for it, meaning that it will do 1 iteration less because the dominating branch must exit on the last loop iteration. But when we have a single exit by condition `C1 & C2`, we cannot apply the same logic, because there is no dominating condition. However, if we can prove that the symbolic max exit count of `C1 & C2` matches those of `C1`, it means that for `C2` we can assume that it doesn't matter on the last iteration (because the whole thing is `false` because `C1` must be `false`). Therefore, in this situation, we can handle `C2` as if it had `SkipLastIter`. Differential Revision: https://reviews.llvm.org/D139934 Reviewed By: nikic --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 44 +++++++++++++++++++- .../X86/widening-vs-and-elimination.ll | 15 +++---- .../Transforms/IndVarSimplify/turn-to-invariant.ll | 48 +++++++++++----------- 3 files changed, 74 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index afc78e2..6e1aaba 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1465,11 +1465,47 @@ static bool optimizeLoopExitWithUnknownExitCount( LeafConditions.push_back(ICmp); } while (!Worklist.empty()); + // If the current basic block has the same exit count as the whole loop, and + // it consists of multiple icmp's, try to collect all icmp's that give exact + // same exit count. For all other icmp's, we could use one less iteration, + // because their value on the last iteration doesn't really matter. + SmallPtrSet ICmpsFailingOnLastIter; + if (!SkipLastIter && LeafConditions.size() > 1 && + SE->getExitCount(L, ExitingBB, + ScalarEvolution::ExitCountKind::SymbolicMaximum) == + MaxIter) + for (auto *ICmp : LeafConditions) { + auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, + /*ControlsExit*/ false); + auto *ExitMax = EL.SymbolicMaxNotTaken; + if (isa(ExitMax)) + continue; + // They could be of different types (specifically this happens after + // IV widening). + auto *WiderType = + SE->getWiderType(ExitMax->getType(), MaxIter->getType()); + auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); + auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); + if (WideExitMax == WideMaxIter) + ICmpsFailingOnLastIter.insert(ICmp); + } + bool Changed = false; - for (auto *OldCond : LeafConditions) + for (auto *OldCond : LeafConditions) { + // Skip last iteration for this icmp under one of two conditions: + // - We do it for all conditions; + // - There is another ICmp that would fail on last iter, so this one doesn't + // really matter. + bool OptimisticSkipLastIter = SkipLastIter; + if (!OptimisticSkipLastIter) { + if (ICmpsFailingOnLastIter.size() > 1) + OptimisticSkipLastIter = true; + else if (ICmpsFailingOnLastIter.size() == 1) + OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); + } if (auto Replaced = createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, - SkipLastIter, SE, Rewriter)) { + OptimisticSkipLastIter, SE, Rewriter)) { Changed = true; auto *NewCond = *Replaced; if (auto *NCI = dyn_cast(NewCond)) { @@ -1481,7 +1517,11 @@ static bool optimizeLoopExitWithUnknownExitCount( assert(OldCond->hasOneUse() && "Must be!"); OldCond->replaceAllUsesWith(NewCond); DeadInsts.push_back(OldCond); + // Make sure we no longer consider this condition as failing on last + // iteration. + ICmpsFailingOnLastIter.erase(OldCond); } + } return Changed; } diff --git a/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll b/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll index fad21e9..ba8f4f0 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll @@ -9,15 +9,15 @@ define void @test_01(i32 %start, i32 %limit) { ; WIDENING_ON-LABEL: @test_01( ; WIDENING_ON-NEXT: bb: ; WIDENING_ON-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 +; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1 +; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 ; WIDENING_ON-NEXT: br label [[LOOP:%.*]] ; WIDENING_ON: loop: ; WIDENING_ON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[BB:%.*]] ] -; WIDENING_ON-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; WIDENING_ON-NEXT: [[NOT_ZERO:%.*]] = icmp ne i64 [[INDVARS_IV]], 0 -; WIDENING_ON-NEXT: [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64 -; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] -; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE]] +; WIDENING_ON-NEXT: [[RANGE_CHECK_WIDE_FIRST_ITER:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]] +; WIDENING_ON-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE_FIRST_ITER]] ; WIDENING_ON-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_ON: backedge: ; WIDENING_ON-NEXT: br label [[LOOP]] @@ -26,13 +26,14 @@ define void @test_01(i32 %start, i32 %limit) { ; ; WIDENING_OFF-LABEL: @test_01( ; WIDENING_OFF-NEXT: bb: +; WIDENING_OFF-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; WIDENING_OFF-NEXT: br label [[LOOP:%.*]] ; WIDENING_OFF: loop: -; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[START:%.*]], [[BB:%.*]] ] +; WIDENING_OFF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[START]], [[BB:%.*]] ] ; WIDENING_OFF-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 ; WIDENING_OFF-NEXT: [[NOT_ZERO:%.*]] = icmp ne i32 [[IV]], 0 -; WIDENING_OFF-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LIMIT:%.*]] -; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK]] +; WIDENING_OFF-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LIMIT:%.*]] +; WIDENING_OFF-NEXT: [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_FIRST_ITER]] ; WIDENING_OFF-NEXT: br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]] ; WIDENING_OFF: backedge: ; WIDENING_OFF-NEXT: [[ZEXT:%.*]] = zext i32 [[IV_NEXT]] to i64 diff --git a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll index d6ef997..809468f 100644 --- a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll +++ b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll @@ -536,20 +536,20 @@ failed_2: ret i32 -2 } -; TODO: This test is equivalent to @test_simple_case, with only difference -; that both checks are merged together into one 'and' check. This -; should not prevent turning the range check into invariant. -; https://alive2.llvm.org/ce/z/G-2ERB +; This test is equivalent to @test_simple_case, with only difference +; that both checks are merged together into one 'and' check. This +; should not prevent turning the range check into invariant. +; https://alive2.llvm.org/ce/z/G-2ERB define i32 @test_and_conditions(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK]] +; CHECK-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK_FIRST_ITER]] ; CHECK-NEXT: br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -584,18 +584,18 @@ failed: ret i32 -3 } -; TODO: Same as test_and_conditions, but swapped exit block branches, -; and condition is expressed as OR. Still optimizable. +; Same as test_and_conditions, but swapped exit block branches, +; and condition is expressed as OR. Still optimizable. define i32 @test_and_conditions_inverse(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_inverse( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED]] +; CHECK-NEXT: [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED_FIRST_ITER]] ; CHECK-NEXT: br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -752,17 +752,17 @@ failed_2: ret i32 -2 } -; TODO: Same as test_and_conditions, but with logical AND. +; Same as test_and_conditions, but with logical AND. define i32 @test_and_conditions_logical_and(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_logical_and( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK]], i1 false +; CHECK-NEXT: [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK_FIRST_ITER]], i1 false ; CHECK-NEXT: br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 @@ -797,17 +797,17 @@ failed: ret i32 -3 } -; TODO: Same as test_and_conditions_inverse, but with logical OR. +; Same as test_and_conditions_inverse, but with logical OR. define i32 @test_and_conditions_inverse_logical_or(i32 %start, i32 %len) { ; CHECK-LABEL: @test_and_conditions_inverse_logical_or( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START:%.*]], -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]] -; CHECK-NEXT: [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED]] +; CHECK-NEXT: [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]] +; CHECK-NEXT: [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED_FIRST_ITER]] ; CHECK-NEXT: br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 -- 2.7.4