From a1b18946f9af130a14655721653beb4510fde9be Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 4 Jan 2023 08:32:25 -0800 Subject: [PATCH] [SLP]Fix incorrect shuffle results because of missing shuffle mask analysis. Missed the analysis of the shuffle mask when trying to analyze the operands of the shuffle instruction during peeking through shuffle instructions. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 28 ++++++++++++++++++---- .../SLPVectorizer/X86/peek-through-shuffle.ll | 2 +- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 947c456..fe2e014f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6405,20 +6405,26 @@ protected: if (auto *SVOpTy = dyn_cast(SV->getOperand(0)->getType())) LocalVF = SVOpTy->getNumElements(); + SmallVector ExtMask(Mask.size(), UndefMaskElem); + for (auto [Idx, I] : enumerate(Mask)) { + if (I == UndefMaskElem) + continue; + ExtMask[Idx] = SV->getMaskValue(I); + } bool IsOp1Undef = isUndefVector(SV->getOperand(0), - buildUseMask(LocalVF, Mask, UseMask::FirstArg)) + buildUseMask(LocalVF, ExtMask, UseMask::FirstArg)) .all(); bool IsOp2Undef = isUndefVector(SV->getOperand(1), - buildUseMask(LocalVF, Mask, UseMask::SecondArg)) + buildUseMask(LocalVF, ExtMask, UseMask::SecondArg)) .all(); if (!IsOp1Undef && !IsOp2Undef) { // Update mask and mark undef elems. for (auto [Idx, I] : enumerate(Mask)) { if (I == UndefMaskElem) continue; - if (SV->getShuffleMask()[I % SV->getShuffleMask().size()] == + if (SV->getMaskValue(I % SV->getShuffleMask().size()) == UndefMaskElem) I = UndefMaskElem; } @@ -6495,14 +6501,26 @@ protected: // again. if (auto *SV1 = dyn_cast(Op1)) if (auto *SV2 = dyn_cast(Op2)) { + SmallVector ExtMask1(Mask.size(), UndefMaskElem); + for (auto [Idx, I] : enumerate(CombinedMask1)) { + if (I == UndefMaskElem) + continue; + ExtMask1[Idx] = SV1->getMaskValue(I); + } SmallBitVector UseMask1 = buildUseMask( cast(SV1->getOperand(1)->getType()) ->getNumElements(), - CombinedMask1, UseMask::FirstArg); + ExtMask1, UseMask::SecondArg); + SmallVector ExtMask2(CombinedMask2.size(), UndefMaskElem); + for (auto [Idx, I] : enumerate(CombinedMask2)) { + if (I == UndefMaskElem) + continue; + ExtMask2[Idx] = SV2->getMaskValue(I); + } SmallBitVector UseMask2 = buildUseMask( cast(SV2->getOperand(1)->getType()) ->getNumElements(), - CombinedMask2, UseMask::FirstArg); + ExtMask2, UseMask::SecondArg); if (SV1->getOperand(0)->getType() == SV2->getOperand(0)->getType() && SV1->getOperand(0)->getType() != SV1->getType() && diff --git a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll index f9e0e4f..047a0d4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll @@ -9,7 +9,7 @@ define void @foo(ptr %0, <4 x float> %1) { ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> zeroinitializer, <4 x float> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP1:%.*]], <4 x float> zeroinitializer, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = fpext <4 x float> [[TMP7]] to <4 x double> ; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[TMP0:%.*]], align 32 ; CHECK-NEXT: ret void -- 2.7.4