From 9a7248f56164b44b07df421384c12541a91e6d84 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 17 May 2023 05:41:34 -0700 Subject: [PATCH] [SLP]Fix crash for scalarized vectors. Need to remove insertion of the nodes to the InVector in case of scalarized vectors too to avoid compiler crashes. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 +-- .../SLPVectorizer/NVPTX/buildvector-scalarized.ll | 37 ++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1fceae5..fd4fee0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7028,10 +7028,8 @@ public: auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size()); // If the resulting type is scalarized, do not adjust the cost. unsigned VecNumParts = TTI.getNumberOfParts(VecTy); - if (VecNumParts == VecTy->getNumElements()) { - InVectors.assign(1, E); + if (VecNumParts == VecTy->getNumElements()) return nullptr; - } DenseMap ExtractVectorsTys; for (auto [I, V] : enumerate(VL)) { // Ignore non-extractelement scalars. diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll new file mode 100644 index 0000000..e1c3d9a --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=slp-vectorizer -S -mtriple=nvptx64-unknown-unknown < %s | FileCheck %s + +define <2 x float> @baz() { +; CHECK-LABEL: define <2 x float> @baz() { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x float> zeroinitializer, i64 0 +; CHECK-NEXT: [[FCMP:%.*]] = fcmp uno float [[EXTRACTELEMENT]], 0.000000e+00 +; CHECK-NEXT: [[FCMP1:%.*]] = fcmp uno float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[FCMP]], [[FCMP1]] +; CHECK-NEXT: [[FCMP2:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[OR3:%.*]] = or i1 [[FCMP2]], [[OR]] +; CHECK-NEXT: [[FCMP4:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[OR5:%.*]] = or i1 [[FCMP4]], [[OR3]] +; CHECK-NEXT: br i1 [[OR5]], label [[BB6:%.*]], label [[BB7:%.*]] +; CHECK: bb6: +; CHECK-NEXT: ret <2 x float> zeroinitializer +; CHECK: bb7: +; CHECK-NEXT: ret <2 x float> zeroinitializer +; +bb: + %extractelement = extractelement <2 x float> zeroinitializer, i64 0 + %fcmp = fcmp uno float %extractelement, 0.000000e+00 + %fcmp1 = fcmp uno float 0.000000e+00, 0.000000e+00 + %or = or i1 %fcmp, %fcmp1 + %fcmp2 = fcmp oeq float 0.000000e+00, 0.000000e+00 + %or3 = or i1 %fcmp2, %or + %fcmp4 = fcmp oeq float 0.000000e+00, 0.000000e+00 + %or5 = or i1 %fcmp4, %or3 + br i1 %or5, label %bb6, label %bb7 + +bb6: ; preds = %bb + ret <2 x float> zeroinitializer + +bb7: ; preds = %bb + ret <2 x float> zeroinitializer +} -- 2.7.4