const SCEV *getGEPExpr(GEPOperator *GEP,
const SmallVectorImpl<const SCEV *> &IndexExprs);
const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
+ const SCEV *getSignumExpr(const SCEV *Op);
const SCEV *getMinMaxExpr(unsigned Kind,
SmallVectorImpl<const SCEV *> &Operands);
const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
return getSMaxExpr(Op, getNegativeSCEV(Op, Flags));
}
+const SCEV *ScalarEvolution::getSignumExpr(const SCEV *Op) {
+ Type *Ty = Op->getType();
+ return getSMinExpr(getSMaxExpr(Op, getMinusOne(Ty)), getOne(Ty));
+}
+
const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
Value *RHS;
bool IsNSW = false;
bool IsNUW = false;
+ bool IsExact = false;
/// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
/// constant expression.
IsNSW = OBO->hasNoSignedWrap();
IsNUW = OBO->hasNoUnsignedWrap();
}
+ if (auto *PEO = dyn_cast<PossiblyExactOperator>(Op))
+ IsExact = PEO->isExact();
}
explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
- bool IsNUW = false)
- : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
+ bool IsNUW = false, bool IsExact = false)
+ : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW),
+ IsExact(IsExact) {}
};
} // end anonymous namespace
}
}
}
+ if (BO->IsExact) {
+ // Given exact arithmetic in-bounds right-shift by a constant,
+ // we can lower it into: (abs(x) EXACT/u (1<<C)) * signum(x)
+ const SCEV *X = getSCEV(BO->LHS);
+ const SCEV *AbsX = getAbsExpr(X, /*IsNSW=*/false);
+ APInt Mult = APInt::getOneBitSet(BitWidth, AShrAmt);
+ const SCEV *Div = getUDivExactExpr(AbsX, getConstant(Mult));
+ return getMulExpr(Div, getSignumExpr(X), SCEV::FlagNSW);
+ }
break;
}
}
; ALL-LABEL: 't3'
; ALL-NEXT: Classifying expressions for: @t3
; ALL-NEXT: %i0 = ashr exact i32 %x, 4
-; ALL-NEXT: --> %i0 U: full-set S: [-134217728,134217728)
+; ALL-NEXT: --> ((((-1 * %x) smax %x) /u 16) * (1 smin (-1 smax %x)))<nsw> U: [-268435455,268435456) S: [-268435455,268435456)
; ALL-NEXT: Determining loop execution counts for: @t3
;
%i0 = ashr exact i32 %x, 4
; ALL-LABEL: 't5'
; ALL-NEXT: Classifying expressions for: @t5
; ALL-NEXT: %i0 = ashr exact i32 %x, 5
-; ALL-NEXT: --> %i0 U: full-set S: [-67108864,67108864)
+; ALL-NEXT: --> ((((-1 * %x) smax %x) /u 32) * (1 smin (-1 smax %x)))<nsw> U: [-134217727,134217728) S: [-134217727,134217728)
; ALL-NEXT: Determining loop execution counts for: @t5
;
%i0 = ashr exact i32 %x, 5
; for (int* cur = start; cur != end; ++cur)
; other[cur - start] += *cur;
; }
+;
+; FIXME: 4 * (%i10 EXACT/s 4) is just %i10
define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
; X64-LABEL: 'pr46786_c26_int'
; X64-NEXT: Classifying expressions for: @pr46786_c26_int
; X64-NEXT: %i10 = sub i64 %i9, %i4
; X64-NEXT: --> ((-1 * %i4) + %i9) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X64-NEXT: %i11 = ashr exact i64 %i10, 2
-; X64-NEXT: --> %i11 U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X64-NEXT: --> (((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9))))<nsw> U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X64-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
-; X64-NEXT: --> ((4 * %i11)<nsw> + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X64-NEXT: --> ((4 * ((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9)))) + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X64-NEXT: %i13 = load i32, i32* %i12, align 4
; X64-NEXT: --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X64-NEXT: %i14 = add nsw i32 %i13, %i8
; X32-NEXT: %i10 = sub i64 %i9, %i4
; X32-NEXT: --> ((-1 * %i4)<nsw> + %i9) U: [-4294967295,4294967296) S: [-8589934591,8589934592) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X32-NEXT: %i11 = ashr exact i64 %i10, 2
-; X32-NEXT: --> %i11 U: full-set S: [-2147483648,2147483648) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X32-NEXT: --> (((((-1 * %i4)<nsw> + %i9) smax ((-1 * %i9)<nsw> + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4)<nsw> + %i9))))<nsw> U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X32-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
-; X32-NEXT: --> ((4 * (trunc i64 %i11 to i32))<nsw> + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X32-NEXT: --> ((4 * (trunc i64 (((((-1 * %i4)<nsw> + %i9) smax ((-1 * %i9)<nsw> + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4)<nsw> + %i9))))<nsw> to i32))<nsw> + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X32-NEXT: %i13 = load i32, i32* %i12, align 4
; X32-NEXT: --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X32-NEXT: %i14 = add nsw i32 %i13, %i8