From b6a0faaa0c793aede7911be241b1895a9ebea41c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 15 Sep 2019 13:03:24 +0000 Subject: [PATCH] [SLP] limit vectorization of Constant subclasses (PR33958) This is a fix for: https://bugs.llvm.org/show_bug.cgi?id=33958 It seems universally true that we would not want to transform this kind of sequence on any target, but if that's not correct, then we could view this as a target-specific cost model problem. We could also white-list ConstantInt, ConstantFP, etc. rather than blacklist Global and ConstantExpr. Differential Revision: https://reviews.llvm.org/D67362 llvm-svn: 371931 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++++-- .../Transforms/SLPVectorizer/X86/consecutive-access.ll | 15 +++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7fbcb23..c18972c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -194,10 +194,13 @@ static bool allSameBlock(ArrayRef VL) { return true; } -/// \returns True if all of the values in \p VL are constants. +/// \returns True if all of the values in \p VL are constants (but not +/// globals/constant expressions). static bool allConstant(ArrayRef VL) { + // Constant expressions and globals can't be vectorized like normal integer/FP + // constants. for (Value *i : VL) - if (!isa(i)) + if (!isa(i) || isa(i) || isa(i)) return false; return true; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll index b77c010..56a7063 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll @@ -549,14 +549,17 @@ for.body: ; preds = %entry, %for.body br i1 %cmp, label %for.body, label %for.cond.cleanup } +; Globals/constant expressions are not normal constants. +; They should not be treated as the usual vectorization candidates. + @g1 = external global i32, align 4 @g2 = external global i32, align 4 define void @PR33958(i32** nocapture %p) { ; CHECK-LABEL: @PR33958( -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32*, i32** [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** [[P]] to <2 x i32*>* -; CHECK-NEXT: store <2 x i32*> , <2 x i32*>* [[TMP1]], align 8 +; CHECK-NEXT: store i32* @g1, i32** [[P:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32*, i32** [[P]], i64 1 +; CHECK-NEXT: store i32* @g2, i32** [[ARRAYIDX1]], align 8 ; CHECK-NEXT: ret void ; store i32* @g1, i32** %p, align 8 @@ -567,9 +570,9 @@ define void @PR33958(i32** nocapture %p) { define void @store_constant_expression(i64* %p) { ; CHECK-LABEL: @store_constant_expression( -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> , <2 x i64>* [[TMP1]], align 8 +; CHECK-NEXT: store i64 ptrtoint (i32* @g1 to i64), i64* [[P:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1 +; CHECK-NEXT: store i64 ptrtoint (i32* @g2 to i64), i64* [[ARRAYIDX1]], align 8 ; CHECK-NEXT: ret void ; store i64 ptrtoint (i32* @g1 to i64), i64* %p, align 8 -- 2.7.4