From: Alexey Bataev Date: Fri, 13 May 2022 19:08:08 +0000 (-0700) Subject: [SLP]Do not look for buildvector sequence, if the index is reused. X-Git-Tag: upstream/15.0.7~7766 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=85f6b15ee50feb316047f52d4bd6ddc639e3c5c1;p=platform%2Fupstream%2Fllvm.git [SLP]Do not look for buildvector sequence, if the index is reused. If the insert indes was used already or is not constant, we should stop looking for unique buildvector sequence, it mustbe splitted to 2 different buildvectors. --- diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7f7e9e3..b9f86cd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -737,11 +737,11 @@ static void inversePermutation(ArrayRef Indices, /// \returns inserting index of InsertElement or InsertValue instruction, /// using Offset as base offset for index. -static Optional getInsertIndex(Value *InsertInst, +static Optional getInsertIndex(const Value *InsertInst, unsigned Offset = 0) { int Index = Offset; - if (auto *IE = dyn_cast(InsertInst)) { - if (auto *CI = dyn_cast(IE->getOperand(2))) { + if (const auto *IE = dyn_cast(InsertInst)) { + if (const auto *CI = dyn_cast(IE->getOperand(2))) { auto *VT = cast(IE->getType()); if (CI->getValue().uge(VT->getNumElements())) return None; @@ -752,13 +752,13 @@ static Optional getInsertIndex(Value *InsertInst, return None; } - auto *IV = cast(InsertInst); + const auto *IV = cast(InsertInst); Type *CurrentType = IV->getType(); for (unsigned I : IV->indices()) { - if (auto *ST = dyn_cast(CurrentType)) { + if (const auto *ST = dyn_cast(CurrentType)) { Index *= ST->getNumElements(); CurrentType = ST->getElementType(I); - } else if (auto *AT = dyn_cast(CurrentType)) { + } else if (const auto *AT = dyn_cast(CurrentType)) { Index *= AT->getNumElements(); CurrentType = AT->getElementType(); } else { @@ -6556,6 +6556,8 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, return false; auto *IE1 = VU; auto *IE2 = V; + unsigned Idx1 = *getInsertIndex(IE1); + unsigned Idx2 = *getInsertIndex(IE2); // Go through the vector operand of insertelement instructions trying to find // either VU as the original vector for IE2 or V as the original vector for // IE1. @@ -6563,13 +6565,15 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, if (IE2 == VU || IE1 == V) return true; if (IE1) { - if (IE1 != VU && !IE1->hasOneUse()) + if ((IE1 != VU && !IE1->hasOneUse()) || + getInsertIndex(IE1).getValueOr(Idx2) == Idx2) IE1 = nullptr; else IE1 = dyn_cast(IE1->getOperand(0)); } if (IE2) { - if (IE2 != V && !IE2->hasOneUse()) + if ((IE2 != V && !IE2->hasOneUse()) || + getInsertIndex(IE2).getValueOr(Idx1) == Idx1) IE2 = nullptr; else IE2 = dyn_cast(IE2->getOperand(0)); @@ -6586,6 +6590,8 @@ static bool isFirstInsertElement(const InsertElementInst *IE1, const auto *I2 = IE2; const InsertElementInst *PrevI1; const InsertElementInst *PrevI2; + unsigned Idx1 = *getInsertIndex(IE1); + unsigned Idx2 = *getInsertIndex(IE2); do { if (I2 == IE1) return true; @@ -6593,9 +6599,11 @@ static bool isFirstInsertElement(const InsertElementInst *IE1, return false; PrevI1 = I1; PrevI2 = I2; - if (I1 && (I1 == IE1 || I1->hasOneUse())) + if (I1 && (I1 == IE1 || I1->hasOneUse()) && + getInsertIndex(I1).getValueOr(Idx2) != Idx2) I1 = dyn_cast(I1->getOperand(0)); - if (I2 && (I2 == IE2 || I2->hasOneUse())) + if (I2 && ((I2 == IE2 || I2->hasOneUse())) && + getInsertIndex(I2).getValueOr(Idx1) != Idx1) I2 = dyn_cast(I2->getOperand(0)); } while ((I1 && PrevI1 != I1) || (I2 && PrevI2 != I2)); llvm_unreachable("Two different buildvectors not expected."); @@ -6764,7 +6772,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { // Find the insertvector, vectorized in tree, if any. Value *Base = VU; while (auto *IEBase = dyn_cast(Base)) { - if (IEBase != EU.User && !IEBase->hasOneUse()) + if (IEBase != EU.User && + (!IEBase->hasOneUse() || + getInsertIndex(IEBase).getValueOr(*InsertIdx) == *InsertIdx)) break; // Build the mask for the vectorized insertelement instructions. if (const TreeEntry *E = getTreeEntry(IEBase)) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll new file mode 100644 index 0000000..0d3c780 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +;RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-android23 < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 +; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0 +; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 +; CHECK-NEXT: ret void +; + %1 = getelementptr inbounds float, ptr undef, i32 2 + %2 = load float, ptr %1, align 4 + %3 = load float, ptr undef, align 4 + %4 = fsub float %2, %3 + %5 = getelementptr inbounds float, ptr undef, i32 3 + %6 = load float, ptr %5, align 4 + %7 = getelementptr inbounds float, ptr undef, i32 1 + %8 = load float, ptr %7, align 4 + %9 = fsub float %6, %8 + %10 = fcmp olt float %9, %4 + %11 = insertelement <2 x float> undef, float %3, i64 0 + %12 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 + store <2 x float> zeroinitializer, ptr null, align 4 + %13 = insertelement <2 x float> %11, float %6, i64 0 + store <2 x float> zeroinitializer, ptr null, align 4 + ret void +}