From 4511f3fa86de9579ded704b66a52492bc7ca08de Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 5 Mar 2019 12:12:18 +0000 Subject: [PATCH] [SCEV] Ensure that isHighCostExpansion takes into account what is being divided A SCEV is not low-cost just because you can divide it by a power of 2. We need to also check what we are dividing to make sure it too is not a high-code expansion. This helps to not expand the exit value of certain loops, helping not to bloat the code. The change in no-iv-rewrite.ll is reverting back to what it was testing before rL194116, and looks a lot like the other tests in replace-loop-exit-folds.ll. Differential Revision: https://reviews.llvm.org/D58435 llvm-svn: 355393 --- llvm/lib/Analysis/ScalarEvolutionExpander.cpp | 7 ++- .../Transforms/IndVarSimplify/no-iv-rewrite.ll | 9 +-- .../IndVarSimplify/replace-loop-exit-folds.ll | 65 +++++----------------- 3 files changed, 22 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 939907c..78bb064 100644 --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2070,10 +2070,13 @@ bool SCEVExpander::isHighCostExpansionHelper( if (auto *UDivExpr = dyn_cast(S)) { // If the divisor is a power of two and the SCEV type fits in a native - // integer, consider the division cheap irrespective of whether it occurs in - // the user code since it can be lowered into a right shift. + // integer (and the LHS not expensive), consider the division cheap + // irrespective of whether it occurs in the user code since it can be + // lowered into a right shift. if (auto *SC = dyn_cast(UDivExpr->getRHS())) if (SC->getAPInt().isPowerOf2()) { + if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed)) + return true; const DataLayout &DL = L->getHeader()->getParent()->getParent()->getDataLayout(); unsigned Width = cast(UDivExpr->getType())->getBitWidth(); diff --git a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll index ff344cb..7411b16 100644 --- a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -223,19 +223,14 @@ entry: %halfLim = ashr i32 %limit, 2 br label %loop -; This test originally checked that the OR instruction was cloned. Now the -; ScalarEvolution is able to understand the loop evolution and that '%iv' at the -; end of the loop is an even value. Thus '%val' is computed at the end of the -; loop and the OR instruction is replaced by an ADD keeping the result -; equivalent. +; Test cloning an or, which is not an OverflowBinaryOperator. ; ; CHECK: sext ; CHECK: loop: ; CHECK: phi i64 ; CHECK-NOT: sext -; CHECK: icmp slt i64 +; CHECK: or i64 ; CHECK: exit: -; CHECK: add i64 loop: %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ] %t1 = sext i32 %iv to i64 diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll index 02bf769..5bac86d3 100644 --- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll +++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll @@ -40,23 +40,16 @@ while.end: ; preds = %while.cond define i32 @used_loop(i32 %size) minsize { ; CHECK-LABEL: @used_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[SIZE:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], -32 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SIZE]], [[UMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 5 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 5 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] +; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31 ; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[SIZE]], [[TMP5]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[SIZE_LCSSA:%.*]] = phi i32 [ [[SIZE_ADDR_0]], [[WHILE_COND]] ] +; CHECK-NEXT: ret i32 [[SIZE_LCSSA]] ; entry: br label %while.cond @@ -77,16 +70,9 @@ while.end: ; preds = %while.cond define i32 @test_signed_while(i32 %S) { ; CHECK-LABEL: @test_signed_while( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[S:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -32 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[S]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 5 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 5 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[S_ADDR_0]], 31 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END:%.*]] ; CHECK: while.body: @@ -94,8 +80,8 @@ define i32 @test_signed_while(i32 %S) { ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[S]], [[TMP5]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i32 [ [[S_ADDR_0]], [[WHILE_COND]] ] +; CHECK-NEXT: ret i32 [[S_ADDR_0_LCSSA]] ; entry: br label %while.cond @@ -118,23 +104,16 @@ while.end: ; preds = %while.cond define i32 @test_signed_do(i32 %S) { ; CHECK-LABEL: @test_signed_do( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 15, [[S:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -16 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -16 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[S]], [[SMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 4 ; CHECK-NEXT: br label [[DO_BODY:%.*]] ; CHECK: do.body: -; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[DO_BODY]] ] ; CHECK-NEXT: [[SUB]] = add nsw i32 [[S_ADDR_0]], -16 ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[SUB]], 15 ; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]] ; CHECK: do.end: -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[S]], -16 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[DO_BODY]] ] +; CHECK-NEXT: ret i32 [[SUB_LCSSA]] ; entry: br label %do.body @@ -154,16 +133,9 @@ do.end: ; preds = %do.body define i32 @test_unsigned_while(i32 %S) { ; CHECK-LABEL: @test_unsigned_while( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[S:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], -16 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -16 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[S]], [[UMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[S_ADDR_0]], 15 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END:%.*]] ; CHECK: while.body: @@ -171,8 +143,8 @@ define i32 @test_unsigned_while(i32 %S) { ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[S]], [[TMP5]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i32 [ [[S_ADDR_0]], [[WHILE_COND]] ] +; CHECK-NEXT: ret i32 [[S_ADDR_0_LCSSA]] ; entry: br label %while.cond @@ -195,23 +167,16 @@ while.end: ; preds = %while.cond define i32 @test_unsigned_do(i32 %S) { ; CHECK-LABEL: @test_unsigned_do( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 15, [[S:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], -16 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -16 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[S]], [[UMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 4 ; CHECK-NEXT: br label [[DO_BODY:%.*]] ; CHECK: do.body: -; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[DO_BODY]] ] +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i32 [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[DO_BODY]] ] ; CHECK-NEXT: [[SUB]] = add i32 [[S_ADDR_0]], -16 ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 15 ; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]] ; CHECK: do.end: -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[S]], -16 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[DO_BODY]] ] +; CHECK-NEXT: ret i32 [[SUB_LCSSA]] ; entry: br label %do.body -- 2.7.4