[SLP]Improve isUndefVector function by adding insertelement analysis.

author Alexey Bataev <a.bataev@outlook.com>

Wed, 14 Sep 2022 19:28:31 +0000 (12:28 -0700)

committer Alexey Bataev <a.bataev@outlook.com>

Fri, 16 Sep 2022 21:36:38 +0000 (14:36 -0700)
author Alexey Bataev <a.bataev@outlook.com>
Wed, 14 Sep 2022 19:28:31 +0000 (12:28 -0700)
committer Alexey Bataev <a.bataev@outlook.com>
Fri, 16 Sep 2022 21:36:38 +0000 (14:36 -0700)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 39033fa..374e2c2 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -319,20 +319,35 @@ static Optional<unsigned> getInsertIndex(const Value *InsertInst,
  }
  
  /// Checks if the given value is actually an undefined constant vector.
-static bool isUndefVector(const Value *V) {
+/// Also, if the\p ShuffleMask is not empty, tries to check if the non-masked
+/// elements actually mask the insertelement buildvector, if any.
+static bool isUndefVector(const Value *V, ArrayRef<int> ShuffleMask = None) {
    if (isa<UndefValue>(V))
      return true;
-  auto *C = dyn_cast<Constant>(V);
-  if (!C)
-    return false;
-  if (!C->containsUndefOrPoisonElement())
-    return false;
-  auto *VecTy = dyn_cast<FixedVectorType>(C->getType());
+  auto *VecTy = dyn_cast<FixedVectorType>(V->getType());
    if (!VecTy)
      return false;
+  auto *C = dyn_cast<Constant>(V);
+  if (!C) {
+    if (!ShuffleMask.empty()) {
+      const Value *Base = V;
+      while (auto *II = dyn_cast<InsertElementInst>(Base)) {
+        Base = II->getOperand(0);
+        Optional<unsigned> Idx = getInsertIndex(II);
+        if (!Idx)
+          continue;
+        if (*Idx < ShuffleMask.size() && ShuffleMask[*Idx] == UndefMaskElem)
+          return false;
+      }
+      return V != Base && isUndefVector(Base);
+    }
+    return false;
+  }
    for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
      if (Constant *Elem = C->getAggregateElement(I))
-      if (!isa<UndefValue>(Elem))
+      if (!isa<UndefValue>(Elem) &&
+          (ShuffleMask.empty() ||
+           (I < ShuffleMask.size() && ShuffleMask[I] == UndefMaskElem)))
          return false;
    }
    return true;
@@ -6360,8 +6375,10 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
        // initial vector or inserting a subvector.
        // TODO: Implement the analysis of the FirstInsert->getOperand(0)
        // subvector of ActualVecTy.
-      if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
-          !IsWholeSubvector) {
+      SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+      copy(Mask, std::next(InsertMask.begin(), OffsetBeg));
+      if (!isUndefVector(FirstInsert->getOperand(0), InsertMask) &&
+          NumScalars != NumElts && !IsWholeSubvector) {
          if (InsertVecSz != VecSz) {
            auto *ActualVecTy =
                FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
@@ -7056,7 +7073,7 @@ static T *performExtractsShuffleAction(
    SmallVector<int> Mask(ShuffleMask.begin()->second);
    auto VMIt = std::next(ShuffleMask.begin());
    T *Prev = nullptr;
-  bool IsBaseNotUndef = !isUndefVector(Base);
+  bool IsBaseNotUndef = !isUndefVector(Base, Mask);
    if (IsBaseNotUndef) {
      // Base is not undef, need to combine it with the next subvectors.
      std::pair<T *, bool> Res =
@@ -8106,14 +8123,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
          }
        }
  
-      if ((!IsIdentity || Offset != 0 ||
-           !isUndefVector(FirstInsert->getOperand(0))) &&
-          NumElts != NumScalars) {
-        SmallVector<int> InsertMask(NumElts);
-        std::iota(InsertMask.begin(), InsertMask.end(), 0);
+      SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+      for (unsigned I = 0; I < NumElts; I++) {
+        if (Mask[I] != UndefMaskElem)
+          InsertMask[Offset + I] = NumElts + I;
+      }
+      if (Offset != 0 ||
+          !isUndefVector(FirstInsert->getOperand(0), InsertMask)) {
          for (unsigned I = 0; I < NumElts; I++) {
-          if (Mask[I] != UndefMaskElem)
-            InsertMask[Offset + I] = NumElts + I;
+          if (InsertMask[I] == UndefMaskElem)
+            InsertMask[I] = I;
          }
  
          V = Builder.CreateShuffleVector(
@@ -8792,8 +8811,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
            if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
                SV->isZeroEltSplat())
              break;
-          bool IsOp1Undef = isUndefVector(SV->getOperand(0));
-          bool IsOp2Undef = isUndefVector(SV->getOperand(1));
+          bool IsOp1Undef = isUndefVector(SV->getOperand(0), Mask);
+          bool IsOp2Undef = isUndefVector(SV->getOperand(1), Mask);
            if (!IsOp1Undef && !IsOp2Undef)
              break;
            SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
@@ -8813,7 +8832,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
                            &CombineMasks](Value *V1, Value *V2,
                                           ArrayRef<int> Mask) -> Value * {
      assert(V1 && "Expected at least one vector value.");
-    if (V2 && !isUndefVector(V2)) {
+    if (V2 && !isUndefVector(V2, Mask)) {
        // Peek through shuffles.
        Value *Op1 = V1;
        Value *Op2 = V2;
@@ -8841,8 +8860,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
              if (SV1->getOperand(0)->getType() ==
                      SV2->getOperand(0)->getType() &&
                  SV1->getOperand(0)->getType() != SV1->getType() &&
-                isUndefVector(SV1->getOperand(1)) &&
-                isUndefVector(SV2->getOperand(1))) {
+                isUndefVector(SV1->getOperand(1), CombinedMask1) &&
+                isUndefVector(SV2->getOperand(1), CombinedMask2)) {
                Op1 = SV1->getOperand(0);
                Op2 = SV2->getOperand(0);
                SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll

index 281169f..82a42a7 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -12,7 +12,7 @@ define void @test() {
  ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
  ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
  ; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
  ; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
  ; CHECK-NEXT:    ret void
  ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll

index 36347b4..b9662c8 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
@@ -53,11 +53,9 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) {
  ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer
  ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer
  ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0
-; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP7]], 0
+; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP8]], 1
  ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } zeroinitializer
  ;
  entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

index 09487d5..5be933e 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -44,8 +44,7 @@ define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
  ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
  ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
  ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 undef, i32 3>
-; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 6, i32 15>
-; CHECK-NEXT:    ret <8 x float> [[RD1]]
+; CHECK-NEXT:    ret <8 x float> [[TMP3]]
  ;
    %c0 = extractelement <4 x i32> %c, i32 0
    %c1 = extractelement <4 x i32> %c, i32 1
author	Alexey Bataev <a.bataev@outlook.com>
	Wed, 14 Sep 2022 19:28:31 +0000 (12:28 -0700)
committer	Alexey Bataev <a.bataev@outlook.com>
	Fri, 16 Sep 2022 21:36:38 +0000 (14:36 -0700)
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll		patch \| blob \| history