From 962c306a11d0a21c884c12e18825b8a41ba1bd7d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 17 Mar 2023 16:26:07 +0000 Subject: [PATCH] [LV] Don't consider pointer as uniform if it is also stored. Update isVectorizedMemAccessUse to also check if the pointer is stored. This prevents LV to incorrectly consider a pointer as uniform if it is used as both pointer and stored by the same StoreInst. Fixes #61396. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 16 +++++++++++----- .../LoopVectorize/consecutive-ptr-uniforms.ll | 13 +++++-------- llvm/test/Transforms/LoopVectorize/opaque-ptr.ll | 22 +++++++++------------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cff8c80..e89eacd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4734,11 +4734,17 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { WideningDecision == CM_Interleave); }; - // Returns true if Ptr is the pointer operand of a memory access instruction - // I, and I is known to not require scalarization. + // I, I is known to not require scalarization, and the pointer is not also + // stored. auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool { - return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF); + auto GetStoredValue = [I]() -> Value * { + if (!isa(I)) + return nullptr; + return I->getOperand(0); + }; + return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF) && + GetStoredValue() != Ptr; }; // Holds a list of values which are known to have at least one uniform use. @@ -4784,8 +4790,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { if (isUniformMemOpUse(&I)) addToWorklistIfAllowed(&I); - if (isUniformDecision(&I, VF)) { - assert(isVectorizedMemAccessUse(&I, Ptr) && "consistency check"); + if (isVectorizedMemAccessUse(&I, Ptr)) { + assert(isUniformDecision(&I, VF) && "consistency check"); HasUniformUse.insert(Ptr); } } diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 8431972..29b2c45 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -482,19 +482,16 @@ for.end: ret void } - -; FIXME: Currently %cur.ptr is incorrectly identified as uniform. - ; CHECK-LABEL: pr61396_pointer_used_as_both_stored_value_and_pointer_operand_by_store -; CHECK: LV: Found uniform instruction: %cur.ptr = getelementptr inbounds ptr, ptr %ary, i64 %iv +; CHECK-NOT: LV: Found uniform instruction: %cur.ptr = getelementptr inbounds ptr, ptr %ary, i64 %iv ; CHECK: define void @pr61396_pointer_used_as_both_stored_value_and_pointer_operand_by_store( ; CHECK: vector.body: ; CHECK-NEXT: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds ptr, ptr %ary, i64 %index -; CHECK-NEXT: [[INS:%.+]] = insertelement <4 x ptr> poison, ptr [[GEP]], i64 0 -; CHECK-NEXT: [[SPLAT:%.+]] = shufflevector <4 x ptr> %broadcast.splatinsert, <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: store <4 x ptr> [[SPLAT]], ptr [[GEP]], align 8 +; CHECK-NEXT: [[VEC_IND:%.+]] = phi <4 x i64> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] +; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds ptr, ptr %ary, <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[EXT:%.+]] = extractelement <4 x ptr> [[GEP]], i64 0 +; CHECK-NEXT: store <4 x ptr> [[GEP]], ptr [[EXT]], align 8 ; define void @pr61396_pointer_used_as_both_stored_value_and_pointer_operand_by_store(ptr %ary) { diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll index 5ec6535..bb58173f 100644 --- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll +++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll @@ -57,20 +57,16 @@ define void @store_pointer_induction(ptr %start, ptr %end) { ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[NEXT_GEP3]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <2 x ptr> [[TMP10]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0 +; CHECK-NEXT: store <2 x ptr> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -82,7 +78,7 @@ define void @store_pointer_induction(ptr %start, ptr %end) { ; CHECK-NEXT: store ptr [[IV]], ptr [[IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds ptr, ptr [[IV]], i32 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; -- 2.7.4