From f61c9b75690e8c374a99332b9b837bf8f77bfc90 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 19 Feb 2023 21:42:04 +0000 Subject: [PATCH] [SLP] Fix infinite loop in isUndefVector. This fixes an infinite loop if isa(II->getOperand(1)) is true. Update Base at the top of the loop, before the continue. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D144292 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- .../SLPVectorizer/AArch64/insertelement.ll | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 92ca124..0b427e1 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -381,9 +381,9 @@ static SmallBitVector isUndefVector(const Value *V, if (!UseMask.empty()) { const Value *Base = V; while (auto *II = dyn_cast(Base)) { + Base = II->getOperand(0); if (isa(II->getOperand(1))) continue; - Base = II->getOperand(0); std::optional Idx = getInsertIndex(II); if (!Idx) continue; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll index 284a240..1198bb1 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll @@ -35,3 +35,25 @@ define @insertelement-scalable-vector() { ; Function Attrs: nounwind readnone speculatable willreturn declare float @llvm.fabs.f32(float) + + +define <4 x float> @insertelement_poison_lanes(ptr %0) { +; CHECK-LABEL: @insertelement_poison_lanes( +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0:%.*]], i64 1 +; CHECK-NEXT: store <2 x double> , ptr [[GEP_1]], align 8 +; CHECK-NEXT: ret <4 x float> [[INS_2]] +; + %trunc.1 = fptrunc double 0.000000e+00 to float + %trunc.2 = fptrunc double 1.000000e+00 to float + %ins.1 = insertelement <4 x float> zeroinitializer, float poison, i64 0 + %ins.2 = insertelement <4 x float> %ins.1, float %trunc.1, i64 0 + %ext.1 = fpext float %trunc.1 to double + %gep.1 = getelementptr double, ptr %0, i64 1 + store double %ext.1, ptr %gep.1, align 8 + %ext.2 = fpext float %trunc.2 to double + %gep.2 = getelementptr double, ptr %0, i64 2 + store double %ext.2, ptr %gep.2, align 8 + ret <4 x float> %ins.2 +} -- 2.7.4