From 4284afdf9432f7d756f56b0ab21d69191adefa8d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 1 Jan 2021 08:43:33 -0800 Subject: [PATCH] [SLP]Need shrink the load vector after reordering. After merging the shuffles, we cannot rely on the previous shuffle anymore and need to shrink the final shuffle, if it is required. Reported in D92668 Differential Revision: https://reviews.llvm.org/D93967 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 19 +++++++------------ .../SLPVectorizer/X86/shrink_after_reorder.ll | 3 ++- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index abd04702..8d6453f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4260,18 +4260,13 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { if (E->isSame(VL)) { Value *V = vectorizeTree(E); if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) { - // We need to get the vectorized value but without shuffle. - if (auto *SV = dyn_cast(V)) { - V = SV->getOperand(0); - } else { - // Reshuffle to get only unique values. - SmallVector UniqueIdxs; - SmallSet UsedIdxs; - for (int Idx : E->ReuseShuffleIndices) - if (UsedIdxs.insert(Idx).second) - UniqueIdxs.emplace_back(Idx); - V = Builder.CreateShuffleVector(V, UniqueIdxs); - } + // Reshuffle to get only unique values. + SmallVector UniqueIdxs; + SmallSet UsedIdxs; + for (int Idx : E->ReuseShuffleIndices) + if (UsedIdxs.insert(Idx).second) + UniqueIdxs.emplace_back(Idx); + V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); } return V; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll index 110e117..533b48e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -10,7 +10,8 @@ define void @wombat(i32* %ptr, i32* %ptr1) { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], +; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5 -- 2.7.4