[SLP] Fix vector element size for the store chains

author Anton Afanasyev <anton.a.afanasyev@gmail.com>

Sun, 13 Dec 2020 11:42:25 +0000 (14:42 +0300)

committer Anton Afanasyev <anton.a.afanasyev@gmail.com>

Mon, 14 Dec 2020 12:51:43 +0000 (15:51 +0300)
author Anton Afanasyev <anton.a.afanasyev@gmail.com>
Sun, 13 Dec 2020 11:42:25 +0000 (14:42 +0300)
committer Anton Afanasyev <anton.a.afanasyev@gmail.com>
Mon, 14 Dec 2020 12:51:43 +0000 (15:51 +0300)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index c5ba370..e1c1c6e 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6076,7 +6076,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
  
      // If a vector register can't hold 1 element, we are done.
      unsigned MaxVecRegSize = R.getMaxVecRegSize();
-    unsigned EltSize = R.getVectorElementSize(Stores[0]);
+    unsigned EltSize = R.getVectorElementSize(Operands[0]);
      if (MaxVecRegSize % EltSize != 0)
        continue;
  
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll

index 63e3178..2fdef62 100644 (file)
--- a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll
@@ -23,28 +23,21 @@ define void @foo(i8* %v0, i8* readonly %v1) {
  ; CHECK-NEXT:    [[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9
  ; CHECK-NEXT:    [[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 10
  ; CHECK-NEXT:    [[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 11
-; CHECK-NEXT:    [[T19:%.*]] = load i32, i32* [[T14]], align 4
-; CHECK-NEXT:    [[T23:%.*]] = load i32, i32* [[T18]], align 4
-; CHECK-NEXT:    [[T27:%.*]] = load i32, i32* [[T22]], align 4
-; CHECK-NEXT:    [[T30:%.*]] = load i32, i32* [[T26]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[T14]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
  ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
-; CHECK-NEXT:    [[T20:%.*]] = add nsw i32 [[T19]], 4
-; CHECK-NEXT:    [[T24:%.*]] = add nsw i32 [[T23]], 4
-; CHECK-NEXT:    [[T28:%.*]] = add nsw i32 [[T27]], 6
-; CHECK-NEXT:    [[T31:%.*]] = add nsw i32 [[T30]], 7
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP2]], <i64 4, i64 4>
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 6, i64 7>
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP5]], <2 x i64>* [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP8]], align 8
-; CHECK-NEXT:    store i32 [[T20]], i32* [[T21]], align 4
-; CHECK-NEXT:    store i32 [[T24]], i32* [[T25]], align 4
-; CHECK-NEXT:    store i32 [[T28]], i32* [[T29]], align 4
-; CHECK-NEXT:    store i32 [[T31]], i32* [[T32]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 4, i32 4, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i64> [[TMP6]], <i64 6, i64 7>
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP8]], <2 x i64>* [[TMP10]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[T21]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP12]], align 4
  ; CHECK-NEXT:    ret void
  ;
    %t0 = bitcast i8* %v0 to i32*
author	Anton Afanasyev <anton.a.afanasyev@gmail.com>
	Sun, 13 Dec 2020 11:42:25 +0000 (14:42 +0300)
committer	Anton Afanasyev <anton.a.afanasyev@gmail.com>
	Mon, 14 Dec 2020 12:51:43 +0000 (15:51 +0300)
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll		patch \| blob \| history