From 656f1d8b74df5d3f5f2bc75a5f2565df48340757 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 6 Nov 2022 11:40:08 +0000 Subject: [PATCH] Revert "[SLP] Extend reordering data of tree entry to support PHI nodes" This reverts commit 87a20868eb2043420d48f591c3437472f7137834 as it has problems with scalable vectors and use-list orders. Test to follow. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 156 ++++++--------------- .../SLPVectorizer/AMDGPU/phi-result-use-order.ll | 10 +- 2 files changed, 51 insertions(+), 115 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f874cfc..ba44d4a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3795,49 +3795,6 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) { return None; } -/// Check if two insertelement instructions are from the same buildvector. -static bool areTwoInsertFromSameBuildVector( - InsertElementInst *VU, InsertElementInst *V, - function_ref GetBaseOperand) { - // Instructions must be from the same basic blocks. - if (VU->getParent() != V->getParent()) - return false; - // Checks if 2 insertelements are from the same buildvector. - if (VU->getType() != V->getType()) - return false; - // Multiple used inserts are separate nodes. - if (!VU->hasOneUse() && !V->hasOneUse()) - return false; - auto *IE1 = VU; - auto *IE2 = V; - unsigned Idx1 = *getInsertIndex(IE1); - unsigned Idx2 = *getInsertIndex(IE2); - // Go through the vector operand of insertelement instructions trying to find - // either VU as the original vector for IE2 or V as the original vector for - // IE1. - do { - if (IE2 == VU) - return VU->hasOneUse(); - if (IE1 == V) - return V->hasOneUse(); - if (IE1) { - if ((IE1 != VU && !IE1->hasOneUse()) || - getInsertIndex(IE1).value_or(Idx2) == Idx2) - IE1 = nullptr; - else - IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); - } - if (IE2) { - if ((IE2 != V && !IE2->hasOneUse()) || - getInsertIndex(IE2).value_or(Idx1) == Idx1) - IE2 = nullptr; - else - IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); - } - } while (IE1 || IE2); - return false; -} - Optional BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // No need to reorder if need to shuffle reuses, still need to shuffle the @@ -3901,58 +3858,6 @@ Optional BoUpSLP::getReorderingData(const TreeEntry &TE, (TopToBottom && isa(TE.getMainOp()))) && !TE.isAltShuffle()) return TE.ReorderIndices; - if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) { - auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) { - if (V1->user_empty() || V2->user_empty()) - return false; - auto *FirstUserOfPhi1 = cast(*V1->user_begin()); - auto *FirstUserOfPhi2 = cast(*V2->user_begin()); - if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *IE2 = dyn_cast(FirstUserOfPhi2)) { - if (!areTwoInsertFromSameBuildVector( - IE1, IE2, - [](InsertElementInst *II) { return II->getOperand(0); })) - return false; - Optional Idx1 = getInsertIndex(IE1); - Optional Idx2 = getInsertIndex(IE2); - if (Idx1 == None || Idx2 == None) - return false; - return *Idx1 < *Idx2; - } - if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *EE2 = dyn_cast(FirstUserOfPhi2)) { - if (EE1->getOperand(0) != EE2->getOperand(0)) - return false; - Optional Idx1 = getExtractIndex(EE1); - Optional Idx2 = getExtractIndex(EE2); - if (Idx1 == None || Idx2 == None) - return false; - return *Idx1 < *Idx2; - } - return false; - }; - auto IsIdentityOrder = [](const OrdersType &Order) { - for (unsigned Idx : seq(0, Order.size())) - if (Idx != Order[Idx]) - return false; - return true; - }; - if (!TE.ReorderIndices.empty()) - return TE.ReorderIndices; - DenseMap PhiToId; - SmallVector Phis; - OrdersType ResOrder(TE.Scalars.size()); - for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) { - PhiToId[TE.Scalars[Id]] = Id; - Phis.push_back(TE.Scalars[Id]); - } - llvm::stable_sort(Phis, PHICompare); - for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id) - ResOrder[Id] = PhiToId[Phis[Id]]; - if (IsIdentityOrder(ResOrder)) - return {}; - return ResOrder; - } if (TE.State == TreeEntry::NeedToGather) { // TODO: add analysis of other gather nodes with extractelement // instructions and other values/instructions, not only undefs. @@ -4030,9 +3935,6 @@ void BoUpSLP::reorderTopToBottom() { // their ordering. DenseMap GathersToOrders; - // Phi nodes can have preferred ordering based on their result users - DenseMap PhisToOrders; - // AltShuffles can also have a preferred ordering that leads to fewer // instructions, e.g., the addsub instruction in x86. DenseMap AltShufflesToOrders; @@ -4047,7 +3949,7 @@ void BoUpSLP::reorderTopToBottom() { // extracts. for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries, &GathersToOrders, &ExternalUserReorderMap, - &AltShufflesToOrders, &PhisToOrders]( + &AltShufflesToOrders]( const std::unique_ptr &TE) { // Look for external users that will probably be vectorized. SmallVector ExternalUserReorderIndices = @@ -4104,9 +4006,6 @@ void BoUpSLP::reorderTopToBottom() { VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get()); if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty()) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); - if (TE->State == TreeEntry::Vectorize && - TE->getOpcode() == Instruction::PHI) - PhisToOrders.try_emplace(TE.get(), *CurrentOrder); } }); @@ -4132,8 +4031,8 @@ void BoUpSLP::reorderTopToBottom() { if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE)) continue; // Count number of orders uses. - const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders, - &PhisToOrders]() -> const OrdersType & { + const auto &Order = [OpTE, &GathersToOrders, + &AltShufflesToOrders]() -> const OrdersType & { if (OpTE->State == TreeEntry::NeedToGather || !OpTE->ReuseShuffleIndices.empty()) { auto It = GathersToOrders.find(OpTE); @@ -4145,12 +4044,6 @@ void BoUpSLP::reorderTopToBottom() { if (It != AltShufflesToOrders.end()) return It->second; } - if (OpTE->State == TreeEntry::Vectorize && - isa(OpTE->getMainOp())) { - auto It = PhisToOrders.find(OpTE); - if (It != PhisToOrders.end()) - return It->second; - } return OpTE->ReorderIndices; }(); // First consider the order of the external scalar users. @@ -7245,6 +7138,49 @@ InstructionCost BoUpSLP::getSpillCost() const { return Cost; } +/// Check if two insertelement instructions are from the same buildvector. +static bool areTwoInsertFromSameBuildVector( + InsertElementInst *VU, InsertElementInst *V, + function_ref GetBaseOperand) { + // Instructions must be from the same basic blocks. + if (VU->getParent() != V->getParent()) + return false; + // Checks if 2 insertelements are from the same buildvector. + if (VU->getType() != V->getType()) + return false; + // Multiple used inserts are separate nodes. + if (!VU->hasOneUse() && !V->hasOneUse()) + return false; + auto *IE1 = VU; + auto *IE2 = V; + unsigned Idx1 = *getInsertIndex(IE1); + unsigned Idx2 = *getInsertIndex(IE2); + // Go through the vector operand of insertelement instructions trying to find + // either VU as the original vector for IE2 or V as the original vector for + // IE1. + do { + if (IE2 == VU) + return VU->hasOneUse(); + if (IE1 == V) + return V->hasOneUse(); + if (IE1) { + if ((IE1 != VU && !IE1->hasOneUse()) || + getInsertIndex(IE1).value_or(Idx2) == Idx2) + IE1 = nullptr; + else + IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); + } + if (IE2) { + if ((IE2 != V && !IE2->hasOneUse()) || + getInsertIndex(IE2).value_or(Idx1) == Idx1) + IE2 = nullptr; + else + IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); + } + } while (IE1 || IE2); + return false; +} + /// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the /// buildvector sequence. static bool isFirstInsertElement(const InsertElementInst *IE1, diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index f870fb3..5dff4be 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -63,8 +63,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] @@ -73,15 +73,15 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] ; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> ; CHECK-NEXT: ret <4 x half> [[TMP12]] -- 2.7.4