LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
<< VL.size() << ".\n");
- // Check that all of the parts are scalar instructions of the same type,
+ // Check that all of the parts are instructions of the same type,
// we permit an alternate opcode via InstructionsState.
InstructionsState S = getSameOpcode(VL);
if (!S.getOpcode())
return false;
Instruction *I0 = cast<Instruction>(S.OpValue);
- unsigned Sz = R.getVectorElementSize(I0);
- unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
- unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
- if (MaxVF < 2) {
- R.getORE()->emit([&]() {
- return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
- << "Cannot SLP vectorize list: vectorization factor "
- << "less than 2 is not supported";
- });
- return false;
- }
-
+ // Make sure invalid types (including vector type) are rejected before
+ // determining vectorization factor for scalar instructions.
for (Value *V : VL) {
Type *Ty = V->getType();
if (!isValidElementType(Ty)) {
}
}
+ unsigned Sz = R.getVectorElementSize(I0);
+ unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+ unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+ if (MaxVF < 2) {
+ R.getORE()->emit([&]() {
+ return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
+ << "Cannot SLP vectorize list: vectorization factor "
+ << "less than 2 is not supported";
+ });
+ return false;
+ }
+
bool Changed = false;
bool CandidateFound = false;
int MinCost = SLPCostThreshold;
--- /dev/null
+; RUN: opt < %s -slp-vectorizer -S -pass-remarks-missed=slp-vectorizer 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that slp vectorizer is not trying to vectorize instructions already vectorized.
+; CHECK: remark: <unknown>:0:0: Cannot SLP vectorize list: type <16 x i8> is unsupported by vectorizer
+
+define void @vector() {
+ %load0 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 1)
+ %load1 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 2)
+ %add = add <16 x i8> %load1, %load0
+ tail call void @vector.store(<16 x i8> %add, <16 x i8>* undef, i32 1)
+ ret void
+}
+
+declare <16 x i8> @vector.load(<16 x i8>*, i32)
+declare void @vector.store(<16 x i8>, <16 x i8>*, i32)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that we are not crashing or changing the code.
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT: [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 16 x i8> [[LOAD1]], [[LOAD0]]
+; CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[ADD]], <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT: ret void
+;
+ %load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+ %load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+ %add = add <vscale x 16 x i8> %load1, %load0
+ tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %add, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+ ret void
+}
+
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)