From: Nikita Popov Date: Tue, 10 May 2022 10:07:20 +0000 (+0200) Subject: [ValueTracking] Enable -branch-on-poison-as-ub by default X-Git-Tag: upstream/15.0.7~6224 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=03aceab08bc9439e454d827bd1ffc94659c2c510;p=platform%2Fupstream%2Fllvm.git [ValueTracking] Enable -branch-on-poison-as-ub by default Now that SimpleLoopUnswitch and other transforms no longer introduce branch on poison, enable the -branch-on-poison-as-ub option by default. The practical impact of this is mostly better flag preservation in SCEV, and some freeze instructions no longer being necessary. Differential Revision: https://reviews.llvm.org/D125299 --- diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 04752c4..b27ceee 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -84,13 +84,12 @@ static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", // According to the LangRef, branching on a poison condition is absolutely // immediate full UB. However, historically we haven't implemented that -// consistently as we have an important transformation (non-trivial unswitch) -// which introduces instances of branch on poison/undef to otherwise well -// defined programs. This flag exists to let us test optimization benefit -// of exploiting the specified behavior (in combination with enabling the -// unswitch fix.) +// consistently as we had an important transformation (non-trivial unswitch) +// which introduced instances of branch on poison/undef to otherwise well +// defined programs. This issue has since been fixed, but the flag is +// temporarily retained to easily diagnose potential regressions. static cl::opt BranchOnPoisonAsUB("branch-on-poison-as-ub", - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); /// Returns the bitwidth of the given scalar or pointer type. For vector types, diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll index bd8e37b..bfbccde 100644 --- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll +++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -1099,7 +1099,7 @@ define void @test-shl-nsw(float* %input, i32 %start, i32 %numIterations) { ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 (256 * %start) to i64)) + %input),+,1024}<%loop> U: full-set S: full-set Exits: ((4 * (sext i32 (256 * %start) to i64)) + (1024 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-shl-nsw ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %start) + %numIterations) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 @@ -1138,7 +1138,7 @@ define void @test-shl-nuw-edgecase(float* %input, i32 %start, i32 %numIterations ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> ((4 * (sext i32 {(-2147483648 * %start),+,-2147483648}<%loop> to i64)) + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-2147483648 + (-2147483648 * %numIterations)) to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-shl-nuw-edgecase ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %start) + %numIterations) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 @@ -1177,7 +1177,7 @@ define void @test-shl-nuw-nsw(float* %input, i32 %start, i32 %numIterations) { ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 (-2147483648 * %start) to i64)) + %input),+,-8589934592}<%loop> U: full-set S: full-set Exits: ((4 * (sext i32 (-2147483648 * %start) to i64)) + (-8589934592 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-shl-nuw-nsw ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %start) + %numIterations) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 @@ -1216,7 +1216,7 @@ define void @test-shl-no-nsw(float* %input, i32 %start, i32 %numIterations) { ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> ((4 * (sext i32 {(-2147483648 * %start),+,-2147483648}<%loop> to i64)) + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-2147483648 + (-2147483648 * %numIterations)) to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-shl-no-nsw ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %start) + %numIterations) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 @@ -1255,7 +1255,7 @@ define void @test-shl-nsw-edgecase(float* %input, i32 %start, i32 %numIterations ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 (1073741824 * %start) to i64)) + %input),+,4294967296}<%loop> U: full-set S: full-set Exits: ((4 * (sext i32 (1073741824 * %start) to i64)) + (4294967296 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-shl-nsw-edgecase ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %start) + %numIterations) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 @@ -1332,7 +1332,7 @@ define void @test-sub-no-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterat ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 %start to i64)) + (-4 * (sext i32 %sub to i64)) + %input),+,4}<%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + (4 * (sext i32 %start to i64)) + (-4 * (sext i32 %sub to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-sub-no-nsw ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %start) + %numIterations) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 @@ -1373,7 +1373,7 @@ define void @test-sub-nsw(float* %input, i32 %start, i32 %sub, i32 %numIteration ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i64 %index64 ; CHECK-NEXT: --> {((4 * (sext i32 %start to i64)) + (-4 * (sext i32 %halfsub to i64)) + %input),+,4}<%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + (4 * (sext i32 %start to i64)) + (-4 * (sext i32 %halfsub to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-sub-nsw ; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %start) + %numIterations) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll index 70d87ef..bf7d2ae 100644 --- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll +++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll @@ -59,7 +59,7 @@ define void @test_mul(i32 %a, i32 %b) { define void @test_sext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: @test_sext ; CHECK: %se2 = sext i64 %iv2.inc to i128 -; CHECK-NEXT: --> {(1 + (sext i64 {(sext i32 (1 + %a) to i64),+,1}<%loop> to i128)),+,1}<%loop2> +; CHECK-NEXT: --> {(1 + (sext i64 {(sext i32 (1 + %a) to i64),+,1}<%loop> to i128)),+,1}<%loop2> entry: br label %loop diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll index 5cd7f6d..3dc21e8 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -123,9 +123,9 @@ define void @pointer_iv_nowrap_guard(i32* %startptr, i32* %endptr) local_unnamed ; CHECK-LABEL: 'pointer_iv_nowrap_guard' ; CHECK-NEXT: Classifying expressions for: @pointer_iv_nowrap_guard ; CHECK-NEXT: %init = getelementptr inbounds i32, i32* %startptr, i64 2000 -; CHECK-NEXT: --> (8000 + %startptr) U: full-set S: full-set +; CHECK-NEXT: --> (8000 + %startptr) U: [8000,0) S: [8000,0) ; CHECK-NEXT: %iv = phi i32* [ %init, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {(8000 + %startptr),+,4}<%loop> U: full-set S: full-set Exits: (8000 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(8000 + %startptr),+,4}<%loop> U: [8000,0) S: [8000,0) Exits: (8000 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr inbounds i32, i32* %iv, i64 1 ; CHECK-NEXT: --> {(8004 + %startptr),+,4}<%loop> U: full-set S: full-set Exits: (8004 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4)) + %startptr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @pointer_iv_nowrap_guard diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll index f1b2c8e..fec36cf 100644 --- a/llvm/test/Analysis/ScalarEvolution/nsw.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll @@ -107,17 +107,19 @@ define void @test3(i32* %begin, i32* %end) nounwind ssp { ; CHECK-LABEL: 'test3' ; CHECK-NEXT: Classifying expressions for: @test3 ; CHECK-NEXT: %indvar.i.i = phi i64 [ %tmp, %for.body.i.i ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,1}<%for.body.i.i> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: <> LoopDispositions: { %for.body.i.i: Computable } +; CHECK-NEXT: --> {0,+,1}<%for.body.i.i> U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: %tmp = add nsw i64 %indvar.i.i, 1 -; CHECK-NEXT: --> {1,+,1}<%for.body.i.i> U: [1,-9223372036854775808) S: [1,-9223372036854775808) Exits: <> LoopDispositions: { %for.body.i.i: Computable } +; CHECK-NEXT: --> {1,+,1}<%for.body.i.i> U: [1,4611686018427387905) S: [1,4611686018427387905) Exits: (1 + ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: %ptrincdec.i.i = getelementptr inbounds i32, i32* %begin, i64 %tmp -; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body.i.i: Computable } +; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: %__first.addr.08.i.i = getelementptr inbounds i32, i32* %begin, i64 %indvar.i.i -; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body.i.i: Computable } +; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4)) + %begin) LoopDispositions: { %for.body.i.i: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test3 -; CHECK-NEXT: Loop %for.body.i.i: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body.i.i: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %for.body.i.i: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4) +; CHECK-NEXT: Loop %for.body.i.i: max backedge-taken count is 4611686018427387903 +; CHECK-NEXT: Loop %for.body.i.i: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body.i.i: Trip multiple is 1 ; entry: %cmp7.i.i = icmp eq i32* %begin, %end @@ -165,21 +167,21 @@ define i32 @PR12375(i32* readnone %arg) { ; CHECK-LABEL: 'PR12375' ; CHECK-NEXT: Classifying expressions for: @PR12375 ; CHECK-NEXT: %tmp = getelementptr inbounds i32, i32* %arg, i64 2 -; CHECK-NEXT: --> (8 + %arg) U: full-set S: full-set +; CHECK-NEXT: --> (8 + %arg) U: [8,0) S: [8,0) ; CHECK-NEXT: %tmp2 = phi i32* [ %arg, %bb ], [ %tmp5, %bb1 ] -; CHECK-NEXT: --> {%arg,+,4}<%bb1> U: full-set S: full-set Exits: ((4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (8 + (ptrtoint i32* %arg to i64)))) /u 4)) + %arg) LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: --> {%arg,+,4}<%bb1> U: full-set S: full-set Exits: (4 + %arg) LoopDispositions: { %bb1: Computable } ; CHECK-NEXT: %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ] -; CHECK-NEXT: --> {0,+,1}<%bb1> U: [0,-2147483648) S: [0,-2147483648) Exits: (trunc i64 ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (8 + (ptrtoint i32* %arg to i64)))) /u 4) to i32) LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: --> {0,+,1}<%bb1> U: [0,-2147483648) S: [0,-2147483648) Exits: 1 LoopDispositions: { %bb1: Computable } ; CHECK-NEXT: %tmp4 = add nsw i32 %tmp3, 1 -; CHECK-NEXT: --> {1,+,1}<%bb1> U: [1,0) S: [1,0) Exits: (1 + (trunc i64 ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (8 + (ptrtoint i32* %arg to i64)))) /u 4) to i32)) LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: --> {1,+,1}<%bb1> U: [1,0) S: [1,0) Exits: 2 LoopDispositions: { %bb1: Computable } ; CHECK-NEXT: %tmp5 = getelementptr inbounds i32, i32* %tmp2, i64 1 -; CHECK-NEXT: --> {(4 + %arg),+,4}<%bb1> U: full-set S: full-set Exits: (4 + (4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (8 + (ptrtoint i32* %arg to i64)))) /u 4)) + %arg) LoopDispositions: { %bb1: Computable } +; CHECK-NEXT: --> {(4 + %arg),+,4}<%bb1> U: [4,0) S: [4,0) Exits: (8 + %arg) LoopDispositions: { %bb1: Computable } ; CHECK-NEXT: Determining loop execution counts for: @PR12375 -; CHECK-NEXT: Loop %bb1: backedge-taken count is ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (8 + (ptrtoint i32* %arg to i64)))) /u 4) -; CHECK-NEXT: Loop %bb1: max backedge-taken count is 1, actual taken count either this or zero. -; CHECK-NEXT: Loop %bb1: Predicated backedge-taken count is ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (8 + (ptrtoint i32* %arg to i64)))) /u 4) +; CHECK-NEXT: Loop %bb1: backedge-taken count is 1 +; CHECK-NEXT: Loop %bb1: max backedge-taken count is 1 +; CHECK-NEXT: Loop %bb1: Predicated backedge-taken count is 1 ; CHECK-NEXT: Predicates: -; CHECK: Loop %bb1: Trip multiple is 1 +; CHECK: Loop %bb1: Trip multiple is 2 ; bb: %tmp = getelementptr inbounds i32, i32* %arg, i64 2 @@ -201,13 +203,13 @@ define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1) { ; CHECK-LABEL: 'PR12376' ; CHECK-NEXT: Classifying expressions for: @PR12376 ; CHECK-NEXT: %tmp = phi i32* [ %arg, %bb ], [ %tmp4, %bb2 ] -; CHECK-NEXT: --> {%arg,+,4}<%bb2> U: full-set S: full-set Exits: ((4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4)) + %arg) LoopDispositions: { %bb2: Computable } +; CHECK-NEXT: --> {%arg,+,4}<%bb2> U: full-set S: full-set Exits: ((4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4)) + %arg) LoopDispositions: { %bb2: Computable } ; CHECK-NEXT: %tmp4 = getelementptr inbounds i32, i32* %tmp, i64 1 -; CHECK-NEXT: --> {(4 + %arg),+,4}<%bb2> U: full-set S: full-set Exits: (4 + (4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4)) + %arg) LoopDispositions: { %bb2: Computable } +; CHECK-NEXT: --> {(4 + %arg),+,4}<%bb2> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4)) + %arg) LoopDispositions: { %bb2: Computable } ; CHECK-NEXT: Determining loop execution counts for: @PR12376 -; CHECK-NEXT: Loop %bb2: backedge-taken count is ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4) -; CHECK-NEXT: Loop %bb2: max backedge-taken count is 4611686018427387903 -; CHECK-NEXT: Loop %bb2: Predicated backedge-taken count is ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4) +; CHECK-NEXT: Loop %bb2: backedge-taken count is ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4) +; CHECK-NEXT: Loop %bb2: max backedge-taken count is 4611686018427387902 +; CHECK-NEXT: Loop %bb2: Predicated backedge-taken count is ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4) ; CHECK-NEXT: Predicates: ; CHECK: Loop %bb2: Trip multiple is 1 ; @@ -270,21 +272,21 @@ define void @test4(i32 %arg) { ; CHECK-NEXT: %array = alloca [10 x i32], align 4 ; CHECK-NEXT: --> %array U: [0,-3) S: [-9223372036854775808,9223372036854775805) ; CHECK-NEXT: %index = phi i32 [ %inc5, %for.body ], [ %arg, %entry ] -; CHECK-NEXT: --> {%arg,+,1}<%for.body> U: full-set S: full-set Exits: (-1 + (10 smax (1 + %arg))) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {%arg,+,1}<%for.body> U: full-set S: full-set Exits: (-1 + (10 smax (1 + %arg))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %sub = add nsw i32 %index, -2 -; CHECK-NEXT: --> {(-2 + %arg),+,1}<%for.body> U: full-set S: full-set Exits: (-3 + (10 smax (1 + %arg))) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(-2 + %arg),+,1}<%for.body> U: full-set S: full-set Exits: (-3 + (10 smax (1 + %arg))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %idxprom = sext i32 %sub to i64 -; CHECK-NEXT: --> {(-2 + (sext i32 %arg to i64)),+,1}<%for.body> U: [-2147483650,4294967304) S: [-2147483650,4294967304) Exits: (-2 + (zext i32 (-1 + (-1 * %arg) + (10 smax (1 + %arg))) to i64) + (sext i32 %arg to i64)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(-2 + (sext i32 %arg to i64)),+,1}<%for.body> U: [-2147483650,4294967304) S: [-2147483650,4294967304) Exits: (-2 + (zext i32 (-1 + (-1 * %arg) + (10 smax (1 + %arg))) to i64) + (sext i32 %arg to i64)) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %array, i64 0, i64 %idxprom -; CHECK-NEXT: --> {(-8 + (4 * (sext i32 %arg to i64)) + %array),+,4}<%for.body> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-8 + (4 * (zext i32 (-1 + (-1 * %arg) + (10 smax (1 + %arg))) to i64)) + (4 * (sext i32 %arg to i64)) + %array) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(-8 + (4 * (sext i32 %arg to i64)) + %array),+,4}<%for.body> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-8 + (4 * (zext i32 (-1 + (-1 * %arg) + (10 smax (1 + %arg))) to i64)) + (4 * (sext i32 %arg to i64)) + %array) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %data = load i32, i32* %arrayidx, align 4 ; CHECK-NEXT: --> %data U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Variant } ; CHECK-NEXT: %inc5 = add nsw i32 %index, 1 -; CHECK-NEXT: --> {(1 + %arg),+,1}<%for.body> U: full-set S: full-set Exits: (10 smax (1 + %arg)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(1 + %arg),+,1}<%for.body> U: full-set S: full-set Exits: (10 smax (1 + %arg)) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test4 -; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (-1 * %arg) + (10 smax (1 + %arg))) +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (-1 * %arg) + (10 smax (1 + %arg))) ; CHECK-NEXT: Loop %for.body: max backedge-taken count is -2147483638 -; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (-1 * %arg) + (10 smax (1 + %arg))) +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (-1 * %arg) + (10 smax (1 + %arg))) ; CHECK-NEXT: Predicates: ; CHECK: Loop %for.body: Trip multiple is 1 ; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll index dfc289a..0616277 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll @@ -102,9 +102,9 @@ for.end: ; preds = %for.body, %entry define void @ult_129_unknown_start(i8 %start) mustprogress { ; CHECK-LABEL: 'ult_129_unknown_start' ; CHECK-NEXT: Determining loop execution counts for: @ult_129_unknown_start -; CHECK-NEXT: Loop %for.body: backedge-taken count is (((127 + (-1 * (1 umin (127 + (-1 * %start) + (-128 umax (-127 + %start))))) + (-1 * %start) + (-128 umax (-127 + %start))) /u -127) + (1 umin (127 + (-1 * %start) + (-128 umax (-127 + %start))))) -; CHECK-NEXT: Loop %for.body: max backedge-taken count is 1 -; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (((127 + (-1 * (1 umin (127 + (-1 * %start) + (-128 umax (-127 + %start))))) + (-1 * %start) + (-128 umax (-127 + %start))) /u -127) + (1 umin (127 + (-1 * %start) + (-128 umax (-127 + %start))))) +; CHECK-NEXT: Loop %for.body: backedge-taken count is 0 +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 0 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 0 ; CHECK-NEXT: Predicates: ; CHECK: Loop %for.body: Trip multiple is 1 ; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-non-unit-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-non-unit-stride.ll index cdfb068..af35375 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-non-unit-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-non-unit-stride.ll @@ -171,7 +171,7 @@ define void @test_postinc_sgt(i64 %lim) { ; CHECK-NEXT: %iv = phi i64 [ 0, %start ], [ %iv.inc2, %latch ] ; CHECK-NEXT: --> {0,+,-2}<%loop> U: [0,-1) S: [-9223372036854775808,1) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.inc = add nsw i64 %iv, -1 -; CHECK-NEXT: --> {-1,+,-2}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {-1,+,-2}<%loop> U: [-9223372036854775808,0) S: [-9223372036854775808,0) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.inc2 = add nsw i64 %iv, -2 ; CHECK-NEXT: --> {-2,+,-2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_postinc_sgt diff --git a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll index d90542b..7329ea8 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll @@ -385,9 +385,8 @@ define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) { ; CHECK: fshbb: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[C]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[B:%.*]], i32 [[C]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %cmp = icmp eq i32 %c, 0 diff --git a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll index 9c40869..66c29f2 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll @@ -380,9 +380,8 @@ define i32 @not_rotr_5(i32 %a, i32 %b) { ; CHECK: rotbb: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = freeze i32 [[B]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[A:%.*]], i32 [[B]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[B]], i32 [[A:%.*]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %cmp = icmp eq i32 %b, 0 diff --git a/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll b/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll index 644b117..be5371e 100644 --- a/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll +++ b/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define float @ashr_expansion_valid(i64 %x, float* %ptr) { ; CHECK-LABEL: @ashr_expansion_valid( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BOUND:%.*]] = ashr i64 [[X:%.*]], 4 +; CHECK-NEXT: [[BOUND:%.*]] = ashr exact i64 [[X:%.*]], 4 ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -49,7 +49,7 @@ define float @ashr_equivalent_expansion(i64 %x, float* %ptr) { ; CHECK-NEXT: [[DIV:%.*]] = udiv exact i64 [[ABS_X]], 16 ; CHECK-NEXT: [[T0:%.*]] = call i64 @llvm.smax.i64(i64 [[X]], i64 -1) ; CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.smin.i64(i64 [[T0]], i64 1) -; CHECK-NEXT: [[BOUND:%.*]] = mul i64 [[DIV]], [[T1]] +; CHECK-NEXT: [[BOUND:%.*]] = mul nsw i64 [[DIV]], [[T1]] ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -97,7 +97,7 @@ define float @no_ashr_due_to_missing_exact_udiv(i64 %x, float* %ptr) { ; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[ABS_X]], 16 ; CHECK-NEXT: [[T0:%.*]] = call i64 @llvm.smax.i64(i64 [[X]], i64 -1) ; CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.smin.i64(i64 [[T0]], i64 1) -; CHECK-NEXT: [[BOUND:%.*]] = mul i64 [[DIV]], [[T1]] +; CHECK-NEXT: [[BOUND:%.*]] = mul nsw i64 [[DIV]], [[T1]] ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -145,7 +145,7 @@ define float @no_ashr_due_to_different_ops(i64 %x, i64 %y, float* %ptr) { ; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[ABS_X]], 16 ; CHECK-NEXT: [[T0:%.*]] = call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -1) ; CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.smin.i64(i64 [[T0]], i64 1) -; CHECK-NEXT: [[BOUND:%.*]] = mul i64 [[DIV]], [[T1]] +; CHECK-NEXT: [[BOUND:%.*]] = mul nsw i64 [[DIV]], [[T1]] ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll index 6477023..b1ff97f 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -7,7 +7,7 @@ define void @ptriv_as2(i8 addrspace(2)* %base, i32 %n) nounwind { ; CHECK-LABEL: @ptriv_as2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_TRUNC:%.*]] = trunc i32 [[N:%.*]] to i8 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8 addrspace(2)* [[BASE:%.*]], i8 [[IDX_TRUNC]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(2)* [[BASE:%.*]], i8 [[IDX_TRUNC]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 addrspace(2)* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: @@ -53,7 +53,7 @@ define void @ptriv_as3(i8 addrspace(3)* %base, i32 %n) nounwind { ; CHECK-LABEL: @ptriv_as3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_TRUNC:%.*]] = trunc i32 [[N:%.*]] to i16 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8 addrspace(3)* [[BASE:%.*]], i16 [[IDX_TRUNC]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[BASE:%.*]], i16 [[IDX_TRUNC]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 addrspace(3)* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll index 96dd92d..5117a80 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -16,7 +16,7 @@ define void @ptriv(i8* %base, i32 %n) nounwind { ; CHECK-LABEL: @ptriv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[N:%.*]] to i64 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: @@ -63,7 +63,7 @@ for.end: define void @expandOuterRecurrence(i32 %arg) nounwind { ; CHECK-LABEL: @expandOuterRecurrence( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: outer.preheader: diff --git a/llvm/test/Transforms/IndVarSimplify/pr24783.ll b/llvm/test/Transforms/IndVarSimplify/pr24783.ll index 9d60f9c..83b4a53 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr24783.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr24783.ll @@ -7,7 +7,7 @@ target triple = "powerpc64-unknown-linux-gnu" define void @f(i32* %end.s, i8** %loc, i32 %p) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[END:%.*]] = getelementptr i32, i32* [[END_S:%.*]], i32 [[P:%.*]] +; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i32, i32* [[END_S:%.*]], i32 [[P:%.*]] ; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] ; CHECK: while.body.i: ; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[WHILE_BODY_I]] diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll index a10d432..d7c3e3a 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll @@ -17,7 +17,7 @@ define i64 @blam(%struct.hoge* %start, %struct.hoge* %end, %struct.hoge* %ptr.2) ; CHECK-NEXT: [[LSR_IV5:%.*]] = phi i64 [ [[LSR_IV_NEXT6:%.*]], [[LOOP_1_HEADER]] ], [ [[START1]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV:%.*]] = phi %struct.hoge* [ [[IV_NEXT:%.*]], [[LOOP_1_HEADER]] ], [ [[START]], [[ENTRY]] ] ; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[IV]], i64 1 -; CHECK-NEXT: [[LSR_IV_NEXT6]] = add i64 [[LSR_IV5]], 16 +; CHECK-NEXT: [[LSR_IV_NEXT6]] = add nuw i64 [[LSR_IV5]], 16 ; CHECK-NEXT: [[EC:%.*]] = icmp eq %struct.hoge* [[IV_NEXT]], [[END:%.*]] ; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]] ; CHECK: loop.2.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 6197bcd..f66939e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -11,7 +11,7 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_no_scalarization(i64* %a, i32 %idx, i32 %n) #0 { ; CHECK-LABEL: @test_no_scalarization( ; CHECK-NEXT: L.entry: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IDX:%.*]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[IDX:%.*]], 1 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[IDX]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll index 53b78be..4d5d87d 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll @@ -10,7 +10,6 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) { ; CHECK-LABEL: @sum_2_at_with_int_conversion( ; CHECK-NEXT: at_with_int_conversion.exit12.peel: -; CHECK-NEXT: [[N_FR:%.*]] = freeze i64 [[N:%.*]] ; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8 ; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 @@ -23,60 +22,60 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) { ; CHECK-NEXT: [[START_I2_PEEL:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8 ; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8 ; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64 -; CHECK-NEXT: [[END_I4_PEEL_FR:%.*]] = freeze i64* [[END_I4_PEEL]] -; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL_FR]] to i64 +; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL]] to i64 ; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]] ; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, i64* [[START_I]], align 4 ; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4 ; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]] -; CHECK-NEXT: [[C_PEEL:%.*]] = icmp sgt i64 [[N_FR]], 0 +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[C_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N_FR]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[SUB_I7_PEEL]]) -; CHECK-NEXT: [[UMIN16:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN16]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I7_PEEL]], i64 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[UMIN]] +; CHECK-NEXT: [[UMIN16:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[SUB_I]]) +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[UMIN16]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER21:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[WIDE_LOAD18]], [[VEC_PHI17]] -; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[WIDE_LOAD19]] -; CHECK-NEXT: [[TMP16]] = add <2 x i64> [[TMP14]], [[WIDE_LOAD20]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[TMP6]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i64> [[WIDE_LOAD18]], [[VEC_PHI17]] +; CHECK-NEXT: [[TMP16]] = add <2 x i64> [[TMP14]], [[WIDE_LOAD19]] +; CHECK-NEXT: [[TMP17]] = add <2 x i64> [[TMP15]], [[WIDE_LOAD20]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP16]], [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) ; CHECK-NEXT: br label [[LOOP_PREHEADER21]] ; CHECK: loop.preheader21: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER21]] ] @@ -100,7 +99,7 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) { ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LV_I]], [[SUM]] ; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD]], [[LV_I10]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i64 [[IV]], [[N_FR]] +; CHECK-NEXT: [[C:%.*]] = icmp slt i64 [[IV]], [[N]] ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT12_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT12]] ] @@ -127,7 +126,6 @@ exit: define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) { ; CHECK-LABEL: @sum_3_at_with_int_conversion( ; CHECK-NEXT: at_with_int_conversion.exit24.peel: -; CHECK-NEXT: [[N_FR:%.*]] = freeze i64 [[N:%.*]] ; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8 ; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 @@ -149,70 +147,70 @@ define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) { ; CHECK-NEXT: [[START_I14_PEEL:%.*]] = load i64*, i64** [[GEP_START_I13]], align 8 ; CHECK-NEXT: [[END_I16_PEEL:%.*]] = load i64*, i64** [[GEP_END_I15]], align 8 ; CHECK-NEXT: [[START_INT_I17_PEEL:%.*]] = ptrtoint i64* [[START_I14_PEEL]] to i64 -; CHECK-NEXT: [[END_I16_PEEL_FR:%.*]] = freeze i64* [[END_I16_PEEL]] -; CHECK-NEXT: [[END_INT_I18_PEEL:%.*]] = ptrtoint i64* [[END_I16_PEEL_FR]] to i64 +; CHECK-NEXT: [[END_INT_I18_PEEL:%.*]] = ptrtoint i64* [[END_I16_PEEL]] to i64 ; CHECK-NEXT: [[SUB_I19_PEEL:%.*]] = sub i64 [[END_INT_I18_PEEL]], [[START_INT_I17_PEEL]] ; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4 ; CHECK-NEXT: [[LV_I22_PEEL:%.*]] = load i64, i64* [[START_I14_PEEL]], align 4 ; CHECK-NEXT: [[ADD_2_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]] ; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[ADD_2_PEEL]], [[LV_I22_PEEL]] -; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp sgt i64 [[N_FR]], 0 +; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[COND_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N_FR]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[SUB_I19_PEEL]]) -; CHECK-NEXT: [[UMIN28:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I7_PEEL]]) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I19_PEEL]], i64 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[UMIN]] +; CHECK-NEXT: [[UMIN28:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[SUB_I7_PEEL]]) ; CHECK-NEXT: [[UMIN29:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN28]], i64 [[SUB_I]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN29]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[UMIN29]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 5 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER36:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI30:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI30:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, i64* [[START_I14_PEEL]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP13]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i64>, <2 x i64>* [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, i64* [[TMP13]], i64 2 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64* [[TMP15]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <2 x i64>, <2 x i64>* [[TMP16]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[WIDE_LOAD31]], [[VEC_PHI30]] -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[TMP17]], [[WIDE_LOAD32]] -; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i64> [[TMP18]], [[WIDE_LOAD33]] -; CHECK-NEXT: [[TMP21]] = add <2 x i64> [[TMP19]], [[WIDE_LOAD34]] -; CHECK-NEXT: [[TMP22]] = add <2 x i64> [[TMP20]], [[WIDE_LOAD35]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[TMP6]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, i64* [[START_I14_PEEL]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64* [[TMP14]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, i64* [[TMP14]], i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[WIDE_LOAD31]], [[VEC_PHI30]] +; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i64> [[TMP18]], [[WIDE_LOAD32]] +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i64> [[TMP19]], [[WIDE_LOAD33]] +; CHECK-NEXT: [[TMP22]] = add <2 x i64> [[TMP20]], [[WIDE_LOAD34]] +; CHECK-NEXT: [[TMP23]] = add <2 x i64> [[TMP21]], [[WIDE_LOAD35]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP22]], [[TMP21]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP23]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) ; CHECK-NEXT: br label [[LOOP_PREHEADER36]] ; CHECK: loop.preheader36: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER36]] ] @@ -245,7 +243,7 @@ define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) { ; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[ADD_1]], [[LV_I10]] ; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD_2]], [[LV_I22]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[IV]], [[N_FR]] +; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[IV]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT24_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT24]] ] diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll index 981331d..d2be11a 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll @@ -678,21 +678,15 @@ define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_ } ; Test case from PR51992. -; TODO: could handle by inserting freeze. -define i8 @load_extract_safe_with_freeze(<8 x i8> %in, <16 x i8>* %src) { -; CHECK-LABEL: @load_extract_safe_with_freeze( +define i8 @load_extract_safe_due_to_branch_on_poison(<8 x i8> %in, <16 x i8>* %src) { +; CHECK-LABEL: @load_extract_safe_due_to_branch_on_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXT_IDX:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 0 -; CHECK-NEXT: [[EXT_IDX_I32:%.*]] = zext i8 [[EXT_IDX]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[EXT_IDX]], 99 ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[EXIT:%.*]] ; CHECK: then: -; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[SRC:%.*]], align 16 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT_IDX_I32]], 15 -; CHECK-NEXT: [[EXT:%.*]] = extractelement <16 x i8> [[LOAD]], i32 [[AND]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[EXT]], [[THEN]] ] ; CHECK-NEXT: ret i8 0 ; entry: diff --git a/polly/test/DeLICM/pr41656.ll b/polly/test/DeLICM/pr41656.ll index e3e45ee..61f1a6f 100644 --- a/polly/test/DeLICM/pr41656.ll +++ b/polly/test/DeLICM/pr41656.ll @@ -82,7 +82,7 @@ attributes #2 = { nounwind } ; CHECK: Invalid Context: -; CHECK-NEXT: [call24] -> { : call24 <= 2 } +; CHECK-NEXT: [call24] -> { : false } ; CHECK: Defined Behavior Context: ; CHECK-NEXT: [call24] -> { : 3 <= call24 <= 2147483647 }