From ab9a81f736acfb927b0e0b4f0de8710fc2379f70 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 21 Sep 2022 10:19:43 -0700 Subject: [PATCH] [SLP]Try to emit canonical shuffle with undef operand. In the canonical form of the shuffle the poison/undef operand is the second operand, the patch tries to emit canonical form for partial vectorization of the buildvector sequence. Also, this patch starts emitting freeze instruction for shuffles with undef indices if the second shuffle operan is undef, not poison. It is an initial step to D93818, where undef mask element are treated as returning poison value. Differential Revision: https://reviews.llvm.org/D134377 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 46 +++++++++++++++------- .../SLPVectorizer/X86/PR35865-inseltpoison.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll | 3 +- .../insert-element-build-vector-inseltpoison.ll | 2 +- .../X86/insert-element-build-vector.ll | 5 ++- 5 files changed, 38 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index faec561..a9d1b89 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8181,21 +8181,37 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { SmallVector InsertMask(NumElts, UndefMaskElem); for (unsigned I = 0; I < NumElts; I++) { if (Mask[I] != UndefMaskElem) - InsertMask[Offset + I] = NumElts + I; - } - if (Offset != 0 || - !isUndefVector(FirstInsert->getOperand(0), InsertMask)) { - for (unsigned I = 0; I < NumElts; I++) { - if (InsertMask[I] == UndefMaskElem) - InsertMask[I] = I; - } - - V = Builder.CreateShuffleVector( - FirstInsert->getOperand(0), V, InsertMask, - cast(E->Scalars.back())->getName()); - if (auto *I = dyn_cast(V)) { - GatherShuffleSeq.insert(I); - CSEBlocks.insert(I->getParent()); + InsertMask[Offset + I] = I; + } + bool IsFirstUndef = isUndefVector(FirstInsert->getOperand(0), InsertMask); + if ((!IsIdentity || Offset != 0 || !IsFirstUndef) && + NumElts != NumScalars) { + if (IsFirstUndef) { + if (!ShuffleVectorInst::isIdentityMask(InsertMask)) { + V = Builder.CreateShuffleVector( + V, InsertMask, cast(E->Scalars.back())->getName()); + if (auto *I = dyn_cast(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + // Create freeze for undef values. + if (!isa(FirstInsert->getOperand(0))) + V = Builder.CreateFreeze(V); + } + } else { + for (unsigned I = 0; I < NumElts; I++) { + if (InsertMask[I] == UndefMaskElem) + InsertMask[I] = I; + else + InsertMask[I] += NumElts; + } + V = Builder.CreateShuffleVector( + FirstInsert->getOperand(0), V, InsertMask, + cast(E->Scalars.back())->getName()); + if (auto *I = dyn_cast(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } } } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll index 0fc1c7a..660aee2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll @@ -11,7 +11,7 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() { ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll index c8db496..bbeb19e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -11,7 +11,8 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() { ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = freeze <8 x i32> [[VECINS_I_5_I1]] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index 8224ffd..2c635b4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -305,7 +305,7 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 5be933e..6302ef6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -339,8 +339,9 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> -; CHECK-NEXT: ret <4 x float> [[RD1]] +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x float> [[RD1]] +; CHECK-NEXT: ret <4 x float> [[TMP19]] ; %c0 = extractelement <4 x i32> %c, i32 0 %c1 = extractelement <4 x i32> %c, i32 1 -- 2.7.4