From 83cca94194b14aae5e0fd8d3feca9292a631578e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 3 Jul 2019 18:18:36 +0000 Subject: [PATCH] [LFTR] Hoist extend expressions outside of loops w/o waiting for LICM The motivation for this is two fold: 1) Make the output (and thus tests) a bit more readable to a human trying to understand the result of the transform 2) Reduce spurious diffs in a potential future change to restructure all of this logic to use SCEVExpander (which hoists by default) llvm-svn: 365066 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 5 +- llvm/test/Transforms/IndVarSimplify/elim-extend.ll | 2 +- .../Transforms/IndVarSimplify/iv-widen-elim-ext.ll | 162 ++++++++++++++------- llvm/test/Transforms/IndVarSimplify/iv-widen.ll | 4 +- llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll | 2 +- llvm/test/Transforms/IndVarSimplify/lftr.ll | 6 +- .../Transforms/IndVarSimplify/ult-sub-to-eq.ll | 2 +- 7 files changed, 124 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 4d30160..4f939b2 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2511,7 +2511,10 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, } } - if (!Extended) + if (Extended) { + bool Discard; + L->makeLoopInvariant(ExitCnt, Discard); + } else CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), "lftr.wideiv"); } diff --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll index 7fda91c..809ea9b 100644 --- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll +++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll @@ -60,6 +60,7 @@ define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind { ; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INIT]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[LIMIT]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] @@ -70,7 +71,6 @@ define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind { ; CHECK-NEXT: store i8 0, i8* [[POSTADR]] ; CHECK-NEXT: [[POSTADRNSW:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: store i8 0, i8* [[POSTADRNSW]] -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[LIMIT]] to i64 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll index 08804ad..459d6e1 100644 --- a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll +++ b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll @@ -9,31 +9,31 @@ target datalayout = "e-m:e-i64:64-p:64:64:64-n8:16:32:64-S128" define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N -; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TRUNC0:%.*]] = trunc i64 [[TMP1]] to i32 -; CHECK-NEXT: [[DIV0:%.*]] = udiv i32 5, [[TRUNC0]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[DIV0:%.*]] = udiv i32 5, [[TMP3]] ; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD3]], [[DIV0]] -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX5]], align 4 -; CHECK-NEXT: br label %for.inc +; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] ; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: br label %for.end +; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -76,28 +76,28 @@ for.end: ; preds = %for.cond.for.end_cr define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) { ; CHECK-LABEL: @foo1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N -; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4 -; CHECK-NEXT: br label %for.inc +; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] ; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: br label %for.end +; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -142,13 +142,14 @@ for.end: ; preds = %for.cond.for.end_cr define i32 @foo2(i32 %M) { ; CHECK-LABEL: @foo2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M -; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[M:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64 -; CHECK-NEXT: br label %for.body +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[M]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]] @@ -157,14 +158,13 @@ define i32 @foo2(i32 %M) { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[TMP3]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4 -; CHECK-NEXT: br label %for.inc +; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] ; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: br label %for.end +; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0)) ; CHECK-NEXT: ret i32 0 @@ -210,13 +210,14 @@ declare i32 @dummy(i32*, i32*) define i32 @foo3(i32 %M) { ; CHECK-LABEL: @foo3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M -; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[M:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64 -; CHECK-NEXT: br label %for.body +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[M]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_LR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]] @@ -227,14 +228,13 @@ define i32 @foo3(i32 %M) { ; CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP4]] to i64 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[IDXPROM4]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4 -; CHECK-NEXT: br label %for.inc +; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] ; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: br label %for.end +; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0)) ; CHECK-NEXT: ret i32 0 @@ -276,6 +276,38 @@ for.end: ; preds = %for.cond.for.end_cr %struct.image = type {i32, i32} define i32 @foo4(%struct.image* %input, i32 %length, i32* %in) { +; CHECK-LABEL: @foo4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds [[STRUCT_IMAGE:%.*]], %struct.image* [[INPUT:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[STRIDE]], align 4 +; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[LENGTH:%.*]], 1 +; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[CHANNEL:%.*]] = getelementptr inbounds [[STRUCT_IMAGE]], %struct.image* [[INPUT]], i64 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP10:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[CHANNEL]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i64 [[TMP5]], [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ADD_PTR]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = mul nsw i64 [[TMP1]], [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ADD_PTR1]], align 4 +; CHECK-NEXT: [[TMP10]] = add i32 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]] +; entry: %stride = getelementptr inbounds %struct.image, %struct.image* %input, i64 0, i32 1 %0 = load i32, i32* %stride, align 4 @@ -296,8 +328,6 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo ; mul instruction below is widened instead of generating a truncate instruction for it ; regardless if Load operand of mul is inside or outside the loop (we have both cases). -; CHECK: for.body: -; CHECK-NOT: trunc for.body: ; preds = %for.body.lr.ph, %for.body %x.018 = phi i32 [ 1, %for.body.lr.ph ], [ %add, %for.body ] %add = add nuw nsw i32 %x.018, 1 @@ -317,6 +347,40 @@ for.body: ; preds = %for.body.lr.ph, %fo define i32 @foo5(%struct.image* %input, i32 %length, i32* %in) { +; CHECK-LABEL: @foo5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds [[STRUCT_IMAGE:%.*]], %struct.image* [[INPUT:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[STRIDE]], align 4 +; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[LENGTH:%.*]], 1 +; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[CHANNEL:%.*]] = getelementptr inbounds [[STRUCT_IMAGE]], %struct.image* [[INPUT]], i64 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP10:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[CHANNEL]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL]] to i64 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ADD_PTR]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i64 [[TMP1]], [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ADD_PTR1]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[MUL]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]] +; entry: %stride = getelementptr inbounds %struct.image, %struct.image* %input, i64 0, i32 1 %0 = load i32, i32* %stride, align 4 @@ -338,8 +402,6 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo ; This example is the same as above except that the first mul is used in two places ; and this may result in having two versions of the multiply: an i32 and i64 version. ; In this case, keep the trucate instructions to avoid this redundancy. -; CHECK: for.body: -; CHECK: trunc for.body: ; preds = %for.body.lr.ph, %for.body %x.018 = phi i32 [ 1, %for.body.lr.ph ], [ %add, %for.body ] %add = add nuw nsw i32 %x.018, 1 diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll index a8d89b1..9370bcd 100644 --- a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll +++ b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll @@ -173,6 +173,7 @@ define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8 %tmp1) { ; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]] ; CHECK: for.body2.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: br label [[FOR_BODY2:%.*]] ; CHECK: for.body2: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[FOR_BODY2_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY2]] ] @@ -180,12 +181,12 @@ define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8 %tmp1) { ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP5]] ; CHECK-NEXT: store i8 [[TMP1:%.*]], i8* [[ADD_PTR]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_BODY3_PREHEADER:%.*]] ; CHECK: for.body3.preheader: ; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT5:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ 1, [[FOR_BODY3_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_BODY3]] ] @@ -193,7 +194,6 @@ define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8 %tmp1) { ; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP8]] ; CHECK-NEXT: store i8 [[TMP1]], i8* [[ADD_PTR2]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT5:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT5]] ; CHECK-NEXT: br i1 [[EXITCOND6]], label [[FOR_BODY3]], label [[FOR_INC_LOOPEXIT:%.*]] ; CHECK: for.inc.loopexit: diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll index c380f41..c14b452 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -135,6 +135,7 @@ define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector, ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ILEAD:%.*]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[IROW]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ] @@ -148,7 +149,6 @@ define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector, ; CHECK-NEXT: call void @use(double [[V2]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[IROW]] to i64 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN_LOOPEXIT:%.*]] ; CHECK: return.loopexit: diff --git a/llvm/test/Transforms/IndVarSimplify/lftr.ll b/llvm/test/Transforms/IndVarSimplify/lftr.ll index 9f0172f..b72778b 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr.ll @@ -429,6 +429,7 @@ define float @wide_trip_count_test2(float* %a, ; CHECK-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[M:%.*]], 500 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 500, [[FOR_BODY_PREHEADER]] ] @@ -440,7 +441,6 @@ define float @wide_trip_count_test2(float* %a, ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TEMP]], [[TEMP1]] ; CHECK-NEXT: [[ADD]] = fadd float [[SUM_07]], [[MUL]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: @@ -488,6 +488,7 @@ define float @wide_trip_count_test3(float* %b, ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[M:%.*]], -10 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[M]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -10, [[FOR_BODY_PREHEADER]] ] @@ -500,7 +501,6 @@ define float @wide_trip_count_test3(float* %b, ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[TEMP]] ; CHECK-NEXT: [[ADD1]] = fadd float [[SUM_07]], [[MUL]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[M]] to i64 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: @@ -547,6 +547,7 @@ define float @wide_trip_count_test4(float* %b, ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[M:%.*]], 10 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 10, [[FOR_BODY_PREHEADER]] ] @@ -559,7 +560,6 @@ define float @wide_trip_count_test4(float* %b, ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[TEMP]] ; CHECK-NEXT: [[ADD1]] = fadd float [[SUM_07]], [[MUL]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: diff --git a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll index 0bd5064..3650877 100644 --- a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll +++ b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll @@ -12,6 +12,7 @@ define void @test1(float* nocapture %autoc, float* nocapture %data, float %d, i3 ; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[DATA_LEN]], [[SAMPLE]] ; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SUB]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] @@ -26,7 +27,6 @@ define void @test1(float* nocapture %autoc, float* nocapture %data, float %d, i3 ; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[TMP2]], [[MUL]] ; CHECK-NEXT: store float [[ADD3]], float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SUB]] to i64 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: -- 2.7.4