From afd9f422e459fffc98b4d36927b92be7c06f54c4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 1 Jul 2022 11:12:00 +0100 Subject: [PATCH] [LV] Update test for #56319 to use interleave group. The original test was over-reduced. It requires an interleave group, so the last vector iteration of the epilogue vector loop doesn't execute. --- ...xit-cond-optimization-epilogue-vectorization.ll | 46 +++++++++++----------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll index bde168c..74b13d3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll @@ -4,60 +4,62 @@ target triple = "x86_64-apple-macos" ; Test case where the exit condition in the main vector loop can be optimized -; to true, but not in the epilogue vector loop. +; to true, but not in the epilogue vector loop. In the test the interleave +; group requires to execute at least one scalar iteration, meaning the last +; vector iteration of the epilogue vector loop cannot be executed. define void @pr56319(ptr noalias %src, ptr noalias %dst) { ; CHECK-LABEL: @pr56319( ; CHECK-NEXT: iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC:%.*]], i64 [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <96 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <96 x i8> [[WIDE_VEC]], <96 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 -; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP4]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: store <32 x i8> [[STRIDED_VEC]], ptr [[TMP4]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 16, 16 -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK-NEXT: br label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC]], i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <24 x i8>, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <24 x i8> [[WIDE_VEC2]], <24 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store <8 x i8> [[WIDE_LOAD3]], ptr [[TMP9]], align 1 +; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC3]], ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 16, 16 -; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: br label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 24, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [3 x i8], ptr [[SRC]], i64 [[IV]], i64 0 ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -65,13 +67,13 @@ entry: br label %loop loop: - %iv= phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.src = getelementptr inbounds [3 x i8], ptr %src, i64 %iv, i64 0 %l = load i8, ptr %gep.src, align 1 %gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv store i8 %l, ptr %gep.dst, align 1 %iv.next = add nuw nsw i64 %iv, 1 - %cmp = icmp eq i64 %iv.next, 16 + %cmp = icmp eq i64 %iv.next, 32 br i1 %cmp, label %exit, label %loop exit: -- 2.7.4