}
// Check that we can compute the value of AddRec on the exiting iteration
- // without soundness problems. There are two cases to be worried about:
- // 1) BECount could be 255 with type i8. Simply adding one would be
- // incorrect. We may need one extra bit to represent the unsigned
- // trip count.
- // 2) The multiplication of stride by TC may wrap around. This is subtle
- // because computing the result accounting for wrap is insufficient.
- // In order to use the result in an exit test, we must also know that
- // AddRec doesn't take the same value on any previous iteration.
- // The simplest case to consider is a candidate IV which is narrower
- // than the trip count (and thus original IV), but this can also
- // happen due to non-unit strides on the candidate IVs.
+ // without soundness problems. evaluateAtIteration internally needs
+ // to multiply the stride of the iteration number - which may wrap around.
+ // The issue here is subtle because computing the result accounting for
+ // wrap is insufficient. In order to use the result in an exit test, we
+ // must also know that AddRec doesn't take the same value on any previous
+ // iteration. The simplest case to consider is a candidate IV which is
+ // narrower than the trip count (and thus original IV), but this can
+ // also happen due to non-unit strides on the candidate IVs.
+ // TODO: This check should be replaceable with PostInc->hasNoSelfWrap(),
+ // but in practice we appear to be missing inference for cases we should
+ // be able to catch.
ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE));
ConstantRange BECountCR = SE.getUnsignedRange(BECount);
- unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + 1 + StepCR.getMinSignedBits();
+ unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + StepCR.getMinSignedBits();
unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType());
if (NoOverflowBitWidth > ARBitWidth)
continue;
- const SCEV *TermValueSLocal = SE.getAddExpr(
- AddRec->getOperand(0),
- SE.getTruncateOrZeroExtend(
- SE.getMulExpr(
- AddRec->getOperand(1),
- SE.getTruncateOrZeroExtend(
- SE.getAddExpr(BECount, SE.getOne(BECount->getType())),
- AddRec->getOperand(1)->getType())),
- AddRec->getOperand(0)->getType()));
+ const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
+ const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
if (!Expander.isSafeToExpand(TermValueSLocal)) {
LLVM_DEBUG(
dbgs() << "Is not safe to expand terminating value for phi node" << PN
; CHECK-LABEL: @runtime_tripcount(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 84
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 88
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
; In this case, the i8 IVs increment *isn't* nsw. As a result, a N of 0
; is well defined, and thus the post-inc starts at 255.
-; FIXME: miscompile
define void @wrap_around(ptr %a, i8 %N) {
; CHECK-LABEL: @wrap_around(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[N:%.*]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-LABEL: @ptr_of_ptr_addrec(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[START_PTRPTR:%.*]] = getelementptr ptr, ptr [[PTRPTR:%.*]]
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[IT_04]], align 8
-; CHECK-NEXT: tail call void @foo(ptr [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[IT_04]], align 8
+; CHECK-NEXT: tail call void @foo(ptr [[TMP4]])
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH]] to i64
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_04]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = call ptr @foo(ptr [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DST_04]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = call ptr @foo(ptr [[TMP4]])
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]