From b5d91ab73e8c2702bddc5994538129ba267ed5fe Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 8 Nov 2022 06:16:30 -0800 Subject: [PATCH] [SLP]Fix PR58863: Mask index beyond mask size for non-power-2 insertelement analysis. Need to check if the insertelement mask size is reached during cost analysis to avoid compiler crash. Differential Revision: https://reviews.llvm.org/D137639 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++- .../SLPVectorizer/slp-non-pow-2-insertelement.ll | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/slp-non-pow-2-insertelement.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1449cec..2f0a93a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6702,7 +6702,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, if (Mask[I] != UndefMaskElem) Mask[I] = I + VecSz; for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I) - Mask[I] = InMask.test(I) ? UndefMaskElem : I; + Mask[I] = + ((I >= InMask.size()) || InMask.test(I)) ? UndefMaskElem : I; Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); } } diff --git a/llvm/test/Transforms/SLPVectorizer/slp-non-pow-2-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/slp-non-pow-2-insertelement.ll new file mode 100644 index 0000000..379022d --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/slp-non-pow-2-insertelement.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s + +define void @PR58863() { +; CHECK-LABEL: @PR58863( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL_I:%.*]] = fmul float poison, poison +; CHECK-NEXT: [[MUL11_I:%.*]] = fmul float poison, poison +; CHECK-NEXT: [[I:%.*]] = insertelement <3 x float> , float [[MUL_I]], i64 0 +; CHECK-NEXT: [[I1:%.*]] = insertelement <3 x float> [[I]], float [[MUL11_I]], i64 2 +; CHECK-NEXT: ret void +; +entry: + %mul.i = fmul float poison, poison + %mul11.i = fmul float poison, poison + %i = insertelement <3 x float> , float %mul.i, i64 0 + %i1 = insertelement <3 x float> %i, float %mul11.i, i64 2 + ret void +} -- 2.7.4