From: Roman Lebedev Date: Sat, 17 Oct 2020 18:04:26 +0000 (+0300) Subject: [SCEV] Model `ashr exact x, C` as `(abs(x) EXACT/u (1< &IndexExprs); const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW); + const SCEV *getSignumExpr(const SCEV *Op); const SCEV *getMinMaxExpr(unsigned Kind, SmallVectorImpl &Operands); const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index f3e152b..1845ca9 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3339,6 +3339,11 @@ const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) { return getSMaxExpr(Op, getNegativeSCEV(Op, Flags)); } +const SCEV *ScalarEvolution::getSignumExpr(const SCEV *Op) { + Type *Ty = Op->getType(); + return getSMinExpr(getSMaxExpr(Op, getMinusOne(Ty)), getOne(Ty)); +} + const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind, SmallVectorImpl &Ops) { assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); @@ -4273,6 +4278,7 @@ struct BinaryOp { Value *RHS; bool IsNSW = false; bool IsNUW = false; + bool IsExact = false; /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or /// constant expression. @@ -4285,11 +4291,14 @@ struct BinaryOp { IsNSW = OBO->hasNoSignedWrap(); IsNUW = OBO->hasNoUnsignedWrap(); } + if (auto *PEO = dyn_cast(Op)) + IsExact = PEO->isExact(); } explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, - bool IsNUW = false) - : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {} + bool IsNUW = false, bool IsExact = false) + : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW), + IsExact(IsExact) {} }; } // end anonymous namespace @@ -6267,6 +6276,15 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { } } } + if (BO->IsExact) { + // Given exact arithmetic in-bounds right-shift by a constant, + // we can lower it into: (abs(x) EXACT/u (1<LHS); + const SCEV *AbsX = getAbsExpr(X, /*IsNSW=*/false); + APInt Mult = APInt::getOneBitSet(BitWidth, AShrAmt); + const SCEV *Div = getUDivExactExpr(AbsX, getConstant(Mult)); + return getMulExpr(Div, getSignumExpr(X), SCEV::FlagNSW); + } break; } } diff --git a/llvm/test/Analysis/ScalarEvolution/ashr.ll b/llvm/test/Analysis/ScalarEvolution/ashr.ll index 15e1746..cf54c3e 100644 --- a/llvm/test/Analysis/ScalarEvolution/ashr.ll +++ b/llvm/test/Analysis/ScalarEvolution/ashr.ll @@ -42,7 +42,7 @@ define i32 @t3(i32 %x, i32 %y) { ; ALL-LABEL: 't3' ; ALL-NEXT: Classifying expressions for: @t3 ; ALL-NEXT: %i0 = ashr exact i32 %x, 4 -; ALL-NEXT: --> %i0 U: full-set S: [-134217728,134217728) +; ALL-NEXT: --> ((((-1 * %x) smax %x) /u 16) * (1 smin (-1 smax %x))) U: [-268435455,268435456) S: [-268435455,268435456) ; ALL-NEXT: Determining loop execution counts for: @t3 ; %i0 = ashr exact i32 %x, 4 @@ -65,7 +65,7 @@ define i32 @t5(i32 %x, i32 %y) { ; ALL-LABEL: 't5' ; ALL-NEXT: Classifying expressions for: @t5 ; ALL-NEXT: %i0 = ashr exact i32 %x, 5 -; ALL-NEXT: --> %i0 U: full-set S: [-67108864,67108864) +; ALL-NEXT: --> ((((-1 * %x) smax %x) /u 32) * (1 smin (-1 smax %x))) U: [-134217727,134217728) S: [-134217727,134217728) ; ALL-NEXT: Determining loop execution counts for: @t5 ; %i0 = ashr exact i32 %x, 5 diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index 3e55634..1d32c59 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -325,6 +325,8 @@ bb5: ; for (int* cur = start; cur != end; ++cur) ; other[cur - start] += *cur; ; } +; +; FIXME: 4 * (%i10 EXACT/s 4) is just %i10 define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X64-LABEL: 'pr46786_c26_int' ; X64-NEXT: Classifying expressions for: @pr46786_c26_int @@ -339,9 +341,9 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X64-NEXT: %i10 = sub i64 %i9, %i4 ; X64-NEXT: --> ((-1 * %i4) + %i9) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i11 = ashr exact i64 %i10, 2 -; X64-NEXT: --> %i11 U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <> LoopDispositions: { %bb6: Variant } +; X64-NEXT: --> (((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9)))) U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 -; X64-NEXT: --> ((4 * %i11) + %arg2) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X64-NEXT: --> ((4 * ((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9)))) + %arg2) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i13 = load i32, i32* %i12, align 4 ; X64-NEXT: --> %i13 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i14 = add nsw i32 %i13, %i8 @@ -368,9 +370,9 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) { ; X32-NEXT: %i10 = sub i64 %i9, %i4 ; X32-NEXT: --> ((-1 * %i4) + %i9) U: [-4294967295,4294967296) S: [-8589934591,8589934592) Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i11 = ashr exact i64 %i10, 2 -; X32-NEXT: --> %i11 U: full-set S: [-2147483648,2147483648) Exits: <> LoopDispositions: { %bb6: Variant } +; X32-NEXT: --> (((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9)))) U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 -; X32-NEXT: --> ((4 * (trunc i64 %i11 to i32)) + %arg2) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } +; X32-NEXT: --> ((4 * (trunc i64 (((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9)))) to i32)) + %arg2) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i13 = load i32, i32* %i12, align 4 ; X32-NEXT: --> %i13 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i14 = add nsw i32 %i13, %i8