From df9c5bd8d2e6a060b342aa4ac46a9c3313ba2282 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 20 Feb 2023 12:39:33 +0700 Subject: [PATCH] [SCEV] Support umin/smin in SCEVLoopGuardRewriter Adds support for these SCEVs to cover more cases. Differential Revision: https://reviews.llvm.org/D143259 Reviewed By: dmakogon, fhahn --- llvm/lib/Analysis/ScalarEvolution.cpp | 14 ++ ...e-taken-count-guard-info-rewrite-expressions.ll | 36 ++--- .../Analysis/ScalarEvolution/trip-count-minmax.ll | 28 ++-- llvm/test/CodeGen/Thumb2/mve-blockplacement.ll | 179 +++++++++++---------- 4 files changed, 137 insertions(+), 120 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 36cb779..de4d934 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -14965,6 +14965,20 @@ public: Expr); return I->second; } + + const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) { + auto I = Map.find(Expr); + if (I == Map.end()) + return SCEVRewriteVisitor::visitUMinExpr(Expr); + return I->second; + } + + const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) { + auto I = Map.find(Expr); + if (I == Map.end()) + return SCEVRewriteVisitor::visitSMinExpr(Expr); + return I->second; + } }; const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll index 5d7548d..b7792d9 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll @@ -90,7 +90,7 @@ exit: ret i32 0 } -; TODO: Same as rewrite_zext_min_max, but the loop is guarded by narrow check. +; Same as rewrite_zext_min_max, but the loop is guarded by narrow check. define i32 @rewrite_zext_min_max_narrow_check(i32 %N, ptr %arr) { ; CHECK-LABEL: 'rewrite_zext_min_max_narrow_check' ; CHECK-NEXT: Classifying expressions for: @rewrite_zext_min_max_narrow_check @@ -101,14 +101,14 @@ define i32 @rewrite_zext_min_max_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * ((zext i32 (16 umin %N) to i64) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index -; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_min_max_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))) /u 4) ; CHECK-NEXT: Predicates: @@ -137,7 +137,7 @@ exit: ret i32 0 } -; TODO: This is same as rewrite_zext_min_max, but zext and umin are swapped. +; This is same as rewrite_zext_min_max, but zext and umin are swapped. ; It should be able to prove the same exit count. define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) { ; CHECK-LABEL: 'rewrite_min_max_zext' @@ -149,14 +149,14 @@ define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %umin, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index -; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_zext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Predicates: @@ -233,7 +233,7 @@ exit: ret i32 0 } -; TODO: same as rewrite_sext_min_max, but the loop is guarded by narrow check. +; same as rewrite_sext_min_max, but the loop is guarded by narrow check. ; It should be able to prove the same exit count. define i32 @rewrite_sext_min_max_narrow_check(i32 %N, ptr %arr) { ; CHECK-LABEL: 'rewrite_sext_min_max_narrow_check' @@ -245,14 +245,14 @@ define i32 @rewrite_sext_min_max_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,-9223372036854775808) S: [0,9223372036854775805) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nsw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,-9223372036854775808) S: [4,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_min_max_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Predicates: @@ -281,7 +281,7 @@ exit: ret i32 0 } -; TODO: This is a signed version of rewrite_min_max_zext. +; This is a signed version of rewrite_min_max_zext. ; It should be able to prove the same exit count. define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) { ; CHECK-LABEL: 'rewrite_min_max_sext' @@ -293,14 +293,14 @@ define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %smin, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,-9223372036854775808) S: [0,9223372036854775805) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nsw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,-9223372036854775808) S: [4,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_sext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903 +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3 ; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Predicates: diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll index 115f1cb..431e6043 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll @@ -10,12 +10,12 @@ define void @nomulitply(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: %cond = select i1 %cmp, i32 %a, i32 %b ; CHECK-NEXT: --> (%a umin %b) U: full-set S: full-set ; CHECK-NEXT: %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + (%a umin %b)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + (%a umin %b)) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %inc = add nuw nsw i32 %i.08, 1 -; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,0) S: [1,0) Exits: (%a umin %b) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: (%a umin %b) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @nomulitply ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (%a umin %b)) -; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -2 +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + (%a umin %b)) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (%a umin %b)) ; CHECK-NEXT: Predicates: @@ -56,16 +56,16 @@ define void @umin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1 ; CHECK-NEXT: --> ((2 * %a) umin (4 * %b)) U: [0,-3) S: [-2147483648,2147483647) ; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umin (4 * %b))) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umin (4 * %b))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1 -; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,-3) S: [1,-3) Exits: ((2 * %a) umin (4 * %b)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umin (4 * %b)) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @umin ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) -; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -5 +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 2 +; CHECK: Loop %for.body: Trip multiple is 1 ; ; void umin(unsigned a, unsigned b) { ; a *= 2; @@ -156,16 +156,16 @@ define void @smin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1 ; CHECK-NEXT: --> ((2 * %a) smin (4 * %b)) U: [0,-1) S: [-2147483648,2147483645) ; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) smin (4 * %b))) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) smin (4 * %b))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1 -; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a) smin (4 * %b)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) smin (4 * %b)) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @smin ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) smin (4 * %b))) -; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -3 +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) smin (4 * %b))) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + ((2 * %a) smin (4 * %b))) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 2 +; CHECK: Loop %for.body: Trip multiple is 1 ; ; void smin(signed a, signed b) { ; a *= 2; @@ -292,12 +292,12 @@ define void @umin-3and6(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1 ; CHECK-NEXT: --> ((3 * %a) umin (6 * %b)) U: [0,-1) S: full-set ; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((3 * %a) umin (6 * %b))) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((3 * %a) umin (6 * %b))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1 -; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,-1) S: [1,-1) Exits: ((3 * %a) umin (6 * %b)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {1,+,1}<%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((3 * %a) umin (6 * %b)) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @umin-3and6 ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((3 * %a) umin (6 * %b))) -; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -3 +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((3 * %a) umin (6 * %b))) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + ((3 * %a) umin (6 * %b))) ; CHECK-NEXT: Predicates: diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index 879d110..2d7126d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -335,10 +335,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: subs r1, r0, #1 ; CHECK-NEXT: sbcs r1, r12, #0 @@ -346,54 +346,50 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ %bb.1: @ %for.cond2.preheader.lr.ph ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: csel r7, r2, r3, lt -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: cmp r7, #3 +; CHECK-NEXT: csel lr, r2, r3, lt +; CHECK-NEXT: movw r4, #43691 +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: cmp.w lr, #3 ; CHECK-NEXT: it ls ; CHECK-NEXT: movls r1, #3 -; CHECK-NEXT: movw r2, #43691 -; CHECK-NEXT: subs r1, r1, r7 -; CHECK-NEXT: movt r2, #43690 +; CHECK-NEXT: movt r4, #43690 +; CHECK-NEXT: sub.w r1, r1, lr +; CHECK-NEXT: ldr r6, [sp, #128] ; CHECK-NEXT: adds r1, #2 -; CHECK-NEXT: ldr r4, [sp, #120] -; CHECK-NEXT: movw r11, :lower16:c -; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: umull r1, r2, r1, r2 -; CHECK-NEXT: movt r11, :upper16:c +; CHECK-NEXT: movw r8, :lower16:c +; CHECK-NEXT: movt r8, :upper16:c +; CHECK-NEXT: mov.w r9, #12 +; CHECK-NEXT: umull r1, r4, r1, r4 +; CHECK-NEXT: @ implicit-def: $r10 +; CHECK-NEXT: @ implicit-def: $r5 +; CHECK-NEXT: @ implicit-def: $r11 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: movs r1, #4 -; CHECK-NEXT: @ implicit-def: $r8 -; CHECK-NEXT: @ implicit-def: $r9 -; CHECK-NEXT: movs r5, #12 -; CHECK-NEXT: strd r12, r0, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: add.w r6, r3, r2, lsr #1 -; CHECK-NEXT: add.w r1, r1, r2, lsr #1 -; CHECK-NEXT: movw r2, #65532 -; CHECK-NEXT: vdup.32 q6, r6 -; CHECK-NEXT: movt r2, #32767 -; CHECK-NEXT: ands r1, r2 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w r1, r3, r1, lsr #2 -; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: strd r2, r12, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: add.w r3, r3, r4, lsr #1 +; CHECK-NEXT: add.w r1, r1, r4, lsr #1 +; CHECK-NEXT: movw r4, #65532 +; CHECK-NEXT: vdup.32 q6, r3 +; CHECK-NEXT: movt r4, #32767 +; CHECK-NEXT: and.w r7, r1, r4 ; CHECK-NEXT: adr r1, .LCPI1_0 +; CHECK-NEXT: vdup.32 q7, r3 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: adr r1, .LCPI1_1 ; CHECK-NEXT: vldrw.u32 q5, [r1] -; CHECK-NEXT: vadd.i32 q4, q0, r7 -; CHECK-NEXT: @ implicit-def: $r7 +; CHECK-NEXT: vadd.i32 q4, q0, lr ; CHECK-NEXT: b .LBB1_4 ; CHECK-NEXT: .LBB1_2: @ %for.body6.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: cmn.w r9, #4 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: cmn.w r11, #4 ; CHECK-NEXT: it le ; CHECK-NEXT: mvnle r0, #3 ; CHECK-NEXT: movw r2, #18725 ; CHECK-NEXT: adds r0, #6 ; CHECK-NEXT: movt r2, #9362 -; CHECK-NEXT: sub.w r1, r0, r9 -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: sub.w r1, r0, r11 +; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: umull r2, r3, r1, r2 ; CHECK-NEXT: subs r2, r1, r3 ; CHECK-NEXT: add.w r2, r3, r2, lsr #1 @@ -402,14 +398,14 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: sub.w r2, r3, r2, lsr #2 ; CHECK-NEXT: subs r1, r2, r1 ; CHECK-NEXT: add r0, r1 -; CHECK-NEXT: add.w r9, r0, #7 -; CHECK-NEXT: ldrd r12, r0, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: add.w r11, r0, #7 +; CHECK-NEXT: ldrd r12, r0, [sp, #8] @ 8-byte Folded Reload ; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup5 ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: add.w r8, r8, #2 -; CHECK-NEXT: subs.w r1, r8, r0 -; CHECK-NEXT: asr.w r2, r8, #31 -; CHECK-NEXT: sbcs.w r1, r2, r12 +; CHECK-NEXT: adds r5, #2 +; CHECK-NEXT: subs r1, r5, r0 +; CHECK-NEXT: asr.w r3, r5, #31 +; CHECK-NEXT: sbcs.w r1, r3, r12 ; CHECK-NEXT: bge.w .LBB1_28 ; CHECK-NEXT: .LBB1_4: @ %for.cond2.preheader ; CHECK-NEXT: @ =>This Loop Header: Depth=1 @@ -417,29 +413,33 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ Child Loop BB1_8 Depth 2 ; CHECK-NEXT: @ Child Loop BB1_10 Depth 3 ; CHECK-NEXT: @ Child Loop BB1_12 Depth 3 -; CHECK-NEXT: cmp.w r9, #2 +; CHECK-NEXT: cmp.w r11, #2 ; CHECK-NEXT: bgt .LBB1_3 ; CHECK-NEXT: @ %bb.5: @ %for.body6.lr.ph ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #5 +; CHECK-NEXT: cmp.w lr, #5 ; CHECK-NEXT: bhi .LBB1_15 ; CHECK-NEXT: @ %bb.6: @ %for.body6.us.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: ldrd r2, r3, [sp, #112] +; CHECK-NEXT: ldrd r2, r3, [sp, #120] ; CHECK-NEXT: movs r0, #32 ; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: mov r4, r7 +; CHECK-NEXT: mov r7, lr ; CHECK-NEXT: bl __aeabi_ldivmod -; CHECK-NEXT: ldrd r12, r0, [sp, #4] @ 8-byte Folded Reload ; CHECK-NEXT: vdup.32 q0, r2 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldrd r2, r12, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: mov lr, r7 +; CHECK-NEXT: mov r7, r4 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: b .LBB1_8 ; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup17.us ; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2 -; CHECK-NEXT: add.w r9, r3, #7 +; CHECK-NEXT: add.w r11, r3, #7 ; CHECK-NEXT: cmn.w r3, #4 -; CHECK-NEXT: mov.w r7, #0 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: bge .LBB1_3 ; CHECK-NEXT: .LBB1_8: @ %for.body6.us ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 @@ -447,103 +447,106 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ Child Loop BB1_10 Depth 3 ; CHECK-NEXT: @ Child Loop BB1_12 Depth 3 ; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: beq .LBB1_11 +; CHECK-NEXT: cbz r2, .LBB1_11 ; CHECK-NEXT: @ %bb.9: @ %for.body13.us51.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2 -; CHECK-NEXT: movw r2, :lower16:a +; CHECK-NEXT: movw r4, :lower16:a ; CHECK-NEXT: vmov q1, q4 -; CHECK-NEXT: movt r2, :upper16:a -; CHECK-NEXT: str r1, [r2] -; CHECK-NEXT: movw r2, :lower16:b -; CHECK-NEXT: movt r2, :upper16:b -; CHECK-NEXT: str r1, [r2] -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: dlstp.32 lr, r6 +; CHECK-NEXT: movt r4, :upper16:a +; CHECK-NEXT: str r1, [r4] +; CHECK-NEXT: movw r4, :lower16:b +; CHECK-NEXT: movt r4, :upper16:b +; CHECK-NEXT: str r1, [r4] +; CHECK-NEXT: mov r4, r7 ; CHECK-NEXT: .LBB1_10: @ %vector.body111 ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 ; CHECK-NEXT: @ Parent Loop BB1_8 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 +; CHECK-NEXT: vqadd.u32 q2, q5, r1 +; CHECK-NEXT: subs r4, #4 +; CHECK-NEXT: vcmp.u32 hi, q7, q2 ; CHECK-NEXT: vshl.i32 q2, q1, #2 -; CHECK-NEXT: vadd.i32 q2, q2, r11 -; CHECK-NEXT: vadd.i32 q1, q1, r5 -; CHECK-NEXT: vstrw.32 q0, [q2] -; CHECK-NEXT: letp lr, .LBB1_10 +; CHECK-NEXT: add.w r1, r1, #4 +; CHECK-NEXT: vadd.i32 q2, q2, r8 +; CHECK-NEXT: vadd.i32 q1, q1, r9 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrwt.32 q0, [q2] +; CHECK-NEXT: bne .LBB1_10 ; CHECK-NEXT: b .LBB1_13 ; CHECK-NEXT: .LBB1_11: @ %vector.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2 -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r4, r7 ; CHECK-NEXT: vmov q1, q4 ; CHECK-NEXT: .LBB1_12: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 ; CHECK-NEXT: @ Parent Loop BB1_8 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vqadd.u32 q2, q5, r1 -; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: subs r4, #4 ; CHECK-NEXT: vcmp.u32 hi, q6, q2 ; CHECK-NEXT: vshl.i32 q2, q1, #2 ; CHECK-NEXT: add.w r1, r1, #4 -; CHECK-NEXT: vadd.i32 q2, q2, r11 -; CHECK-NEXT: vadd.i32 q1, q1, r5 +; CHECK-NEXT: vadd.i32 q2, q2, r8 +; CHECK-NEXT: vadd.i32 q1, q1, r9 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [q2] ; CHECK-NEXT: bne .LBB1_12 ; CHECK-NEXT: .LBB1_13: @ %for.cond9.for.cond15.preheader_crit_edge.us ; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: beq .LBB1_7 ; CHECK-NEXT: @ %bb.14: @ %for.cond9.for.cond15.preheader_crit_edge.us ; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2 -; CHECK-NEXT: eor r1, r7, #1 +; CHECK-NEXT: eor r1, r10, #1 ; CHECK-NEXT: lsls r1, r1, #31 ; CHECK-NEXT: bne .LBB1_7 ; CHECK-NEXT: b .LBB1_26 ; CHECK-NEXT: .LBB1_15: @ %for.body6.lr.ph.split ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: beq.w .LBB1_2 ; CHECK-NEXT: @ %bb.16: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: ldrd r12, r0, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: ldrd r12, r0, [sp, #8] @ 8-byte Folded Reload +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: .LBB1_17: @ %for.body6.us60 ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lsls r1, r7, #31 +; CHECK-NEXT: lsls.w r1, r10, #31 ; CHECK-NEXT: bne .LBB1_27 ; CHECK-NEXT: @ %bb.18: @ %for.cond.cleanup17.us63 ; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2 -; CHECK-NEXT: cmn.w r2, #4 +; CHECK-NEXT: cmn.w r3, #4 ; CHECK-NEXT: bge .LBB1_22 ; CHECK-NEXT: @ %bb.19: @ %for.cond.cleanup17.us63.1 ; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2 -; CHECK-NEXT: cmn.w r2, #12 +; CHECK-NEXT: cmn.w r3, #12 ; CHECK-NEXT: bgt .LBB1_23 ; CHECK-NEXT: @ %bb.20: @ %for.cond.cleanup17.us63.2 ; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2 -; CHECK-NEXT: cmn.w r2, #19 +; CHECK-NEXT: cmn.w r3, #19 ; CHECK-NEXT: bgt .LBB1_24 ; CHECK-NEXT: @ %bb.21: @ %for.cond.cleanup17.us63.3 ; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2 -; CHECK-NEXT: add.w r9, r2, #28 -; CHECK-NEXT: cmn.w r2, #25 -; CHECK-NEXT: mov.w r7, #0 -; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: add.w r11, r3, #28 +; CHECK-NEXT: cmn.w r3, #25 +; CHECK-NEXT: mov.w r10, #0 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: blt .LBB1_17 ; CHECK-NEXT: b .LBB1_3 ; CHECK-NEXT: .LBB1_22: @ %for.cond.cleanup5.loopexit134.split.loop.exit139 ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: add.w r9, r2, #7 +; CHECK-NEXT: add.w r11, r3, #7 ; CHECK-NEXT: b .LBB1_25 ; CHECK-NEXT: .LBB1_23: @ %for.cond.cleanup5.loopexit134.split.loop.exit137 ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: add.w r9, r2, #14 +; CHECK-NEXT: add.w r11, r3, #14 ; CHECK-NEXT: b .LBB1_25 ; CHECK-NEXT: .LBB1_24: @ %for.cond.cleanup5.loopexit134.split.loop.exit135 ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: add.w r9, r2, #21 +; CHECK-NEXT: add.w r11, r3, #21 ; CHECK-NEXT: .LBB1_25: @ %for.cond.cleanup5 ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: b .LBB1_3 ; CHECK-NEXT: .LBB1_26: @ %for.inc19.us ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -552,8 +555,8 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: b .LBB1_27 ; CHECK-NEXT: .LBB1_28: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #24 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 -- 2.7.4