From 8361c5da30588d3d4a48eae648f53be1feb5cfad Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 14 Mar 2022 16:54:07 +0100 Subject: [PATCH] [SLPVectorizer] Handle external load/store pointer uses with opaque pointers In this case we may not generate a bitcast, so the new load/store becomes the external user. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 26 ++++++++++----------- .../Transforms/SLPVectorizer/X86/opaque-ptr.ll | 27 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ffded7a..5bce8bed 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7013,19 +7013,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned AS = LI->getPointerAddressSpace(); Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { - Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS)); + NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); // The pointer operand uses an in-tree scalar so we add the new BitCast - // to ExternalUses list to make sure that an extract will be generated - // in the future. + // or LoadInst to ExternalUses list to make sure that an extract will + // be generated in the future. if (TreeEntry *Entry = getTreeEntry(PO)) { // Find which lane we need to extract. unsigned FoundLane = Entry->findLaneForValue(PO); - ExternalUses.emplace_back(PO, cast(VecPtr), FoundLane); + ExternalUses.emplace_back( + PO, PO != VecPtr ? cast(VecPtr) : NewLI, FoundLane); } - - NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); } else { assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); Value *VecPtr = vectorizeTree(E->getOperand(0)); @@ -7058,17 +7057,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *ScalarPtr = SI->getPointerOperand(); Value *VecPtr = Builder.CreateBitCast( ScalarPtr, VecValue->getType()->getPointerTo(AS)); - StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr, - SI->getAlign()); + StoreInst *ST = + Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign()); - // The pointer operand uses an in-tree scalar, so add the new BitCast to - // ExternalUses to make sure that an extract will be generated in the - // future. + // The pointer operand uses an in-tree scalar, so add the new BitCast or + // StoreInst to ExternalUses to make sure that an extract will be + // generated in the future. if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) { // Find which lane we need to extract. unsigned FoundLane = Entry->findLaneForValue(ScalarPtr); - ExternalUses.push_back( - ExternalUser(ScalarPtr, cast(VecPtr), FoundLane)); + ExternalUses.push_back(ExternalUser( + ScalarPtr, ScalarPtr != VecPtr ? cast(VecPtr) : ST, + FoundLane)); } Value *V = propagateMetadata(ST, E->Scalars); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll index 4b3304f..80cb197 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll @@ -55,3 +55,30 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 { %g3 = getelementptr inbounds i32, ptr %r, i64 %sub3 ret void } + +define void @test2(i64* %a, i64* %b) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP4]], [[TMP6]] +; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; CHECK-NEXT: ret void +; + %a1 = getelementptr inbounds i64, i64* %a, i64 1 + %a2 = getelementptr inbounds i64, i64* %a, i64 2 + %i1 = ptrtoint i64* %a1 to i64 + %b3 = getelementptr inbounds i64, i64* %b, i64 3 + %i2 = ptrtoint i64* %b3 to i64 + %v1 = load i64, i64* %a1, align 8 + %v2 = load i64, i64* %a2, align 8 + %add1 = add i64 %i1, %v1 + %add2 = add i64 %i2, %v2 + store i64 %add1, i64* %a1, align 8 + store i64 %add2, i64* %a2, align 8 + ret void +} -- 2.7.4