Part of D58593.
Compute precise overflow conditions based on all known bits, rather
than just the sign bits. Unsigned a + b overflows iff a > ~b, and we
can determine whether this always/never happens based on the minimal
and maximal values achievable for a and ~b subject to the known bits
constraint.
llvm-svn: 355072
bool UseInstrInfo) {
KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
nullptr, UseInstrInfo);
- if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
-
- if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
- // The sign bit is set in both cases: this MUST overflow.
- return OverflowResult::AlwaysOverflows;
- }
-
- if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
- // The sign bit is clear in both cases: this CANNOT overflow.
- return OverflowResult::NeverOverflows;
- }
- }
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ // a + b overflows iff a > ~b. Determine whether this is never/always true
+ // based on the min/max values achievable under the known bits constraint.
+ APInt MinLHS = LHSKnown.One, MaxLHS = ~LHSKnown.Zero;
+ APInt MinInvRHS = RHSKnown.Zero, MaxInvRHS = ~RHSKnown.One;
+ if (MaxLHS.ule(MinInvRHS))
+ return OverflowResult::NeverOverflows;
+ if (MinLHS.ugt(MaxInvRHS))
+ return OverflowResult::AlwaysOverflows;
return OverflowResult::MayOverflow;
}
; CHECK-LABEL: @ripple_nsw1(
; CHECK-NEXT: [[A:%.*]] = and i16 [[Y:%.*]], 1
; CHECK-NEXT: [[B:%.*]] = and i16 [[X:%.*]], -16385
-; CHECK-NEXT: [[C:%.*]] = add nsw i16 [[A]], [[B]]
+; CHECK-NEXT: [[C:%.*]] = add nuw nsw i16 [[A]], [[B]]
; CHECK-NEXT: ret i16 [[C]]
;
%a = and i16 %y, 1
; CHECK-LABEL: @ripple_nsw2(
; CHECK-NEXT: [[A:%.*]] = and i16 [[Y:%.*]], 1
; CHECK-NEXT: [[B:%.*]] = and i16 [[X:%.*]], -16385
-; CHECK-NEXT: [[C:%.*]] = add nsw i16 [[B]], [[A]]
+; CHECK-NEXT: [[C:%.*]] = add nuw nsw i16 [[B]], [[A]]
; CHECK-NEXT: ret i16 [[C]]
;
%a = and i16 %y, 1
; CHECK-LABEL: @ripple_nsw3(
; CHECK-NEXT: [[A:%.*]] = and i16 [[Y:%.*]], -21845
; CHECK-NEXT: [[B:%.*]] = and i16 [[X:%.*]], 21843
-; CHECK-NEXT: [[C:%.*]] = add nsw i16 [[A]], [[B]]
+; CHECK-NEXT: [[C:%.*]] = add nuw nsw i16 [[A]], [[B]]
; CHECK-NEXT: ret i16 [[C]]
;
%a = and i16 %y, 43691
; CHECK-LABEL: @ripple_nsw4(
; CHECK-NEXT: [[A:%.*]] = and i16 [[Y:%.*]], -21845
; CHECK-NEXT: [[B:%.*]] = and i16 [[X:%.*]], 21843
-; CHECK-NEXT: [[C:%.*]] = add nsw i16 [[B]], [[A]]
+; CHECK-NEXT: [[C:%.*]] = add nuw nsw i16 [[B]], [[A]]
; CHECK-NEXT: ret i16 [[C]]
;
%a = and i16 %y, 43691
; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X:%.*]], 42
; CHECK-NEXT: store i8 [[ADD]], i8* [[P:%.*]], align 1
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[ADD]] to i64
-; CHECK-NEXT: [[R:%.*]] = add nsw i64 [[EXT]], -356
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i64 [[EXT]], -356
; CHECK-NEXT: ret i64 [[R]]
;
%add = add nuw i8 %x, 42
define i8 @test_scalar_uadd_never_overflows(i8 %a) {
; CHECK-LABEL: @test_scalar_uadd_never_overflows(
; CHECK-NEXT: [[A_MASKED:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[A_MASKED]], i8 1)
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i8 [[A_MASKED]], 1
; CHECK-NEXT: ret i8 [[R]]
;
%a_masked = and i8 %a, 129
define <2 x i8> @test_vector_uadd_never_overflows(<2 x i8> %a) {
; CHECK-LABEL: @test_vector_uadd_never_overflows(
; CHECK-NEXT: [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A_MASKED]], <2 x i8> <i8 1, i8 1>)
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], <i8 1, i8 1>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%a_masked = and <2 x i8> %a, <i8 129, i8 129>
define i8 @test_scalar_uadd_always_overflows(i8 %a) {
; CHECK-LABEL: @test_scalar_uadd_always_overflows(
-; CHECK-NEXT: [[A_MASKED:%.*]] = or i8 [[A:%.*]], -64
-; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[A_MASKED]], i8 64)
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 -1
;
%a_masked = or i8 %a, 192
%r = call i8 @llvm.uadd.sat.i8(i8 %a_masked, i8 64)
define <2 x i8> @test_vector_uadd_always_overflows(<2 x i8> %a) {
; CHECK-LABEL: @test_vector_uadd_always_overflows(
-; CHECK-NEXT: [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], <i8 -64, i8 -64>
-; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A_MASKED]], <2 x i8> <i8 64, i8 64>)
-; CHECK-NEXT: ret <2 x i8> [[R]]
+; CHECK-NEXT: ret <2 x i8> <i8 -1, i8 -1>
;
%a_masked = or <2 x i8> %a, <i8 192, i8 192>
%r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_masked, <2 x i8> <i8 64, i8 64>)
define <4 x i8> @or_add(<4 x i8> %v) {
; CHECK-LABEL: @or_add(
; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT: [[T3:%.*]] = add nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
+; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
; CHECK-NEXT: ret <4 x i8> [[T3]]
;
%v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3> ; clear the top bits