From 9255124a0713f1fe57e553c4266380a7087a61c6 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 30 Mar 2023 07:36:12 -0700 Subject: [PATCH] [SLP]Fix a crash when trying to shuffle multiple nodes. Need to transform mask after applying shuffle using the mask itself as a base to correctly mark with identity those indices, actually used in previous shuffle. Allows to fix a crash, if different sized vectors are shuffled. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 +- .../SLPVectorizer/X86/shuffle-multiple-nodes.ll | 63 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1d5da9a..c763f67 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9142,11 +9142,11 @@ public: Value *Vec = InVectors.front(); if (InVectors.size() == 2) { Vec = createShuffle(Vec, InVectors.back(), CommonMask); - transformMaskAfterShuffle(CommonMask, Mask); + transformMaskAfterShuffle(CommonMask, CommonMask); } else if (cast(Vec->getType())->getNumElements() != Mask.size()) { Vec = createShuffle(Vec, nullptr, CommonMask); - transformMaskAfterShuffle(CommonMask, Mask); + transformMaskAfterShuffle(CommonMask, CommonMask); } V1 = createShuffle(V1, V2, Mask); for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll new file mode 100644 index 0000000..370dc46 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s + +define i32 @main(<16 x i32> %bc47.i, <16 x i32> %bc) { +; CHECK-LABEL: define i32 @main +; CHECK-SAME: (<16 x i32> [[BC47_I:%.*]], <16 x i32> [[BC:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i32> [[BC]], <16 x i32> [[BC47_I]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: store <8 x i32> [[TMP7]], ptr null, align 16 +; CHECK-NEXT: ret i32 0 +; +entry: + %0 = extractelement <16 x i32> %bc47.i, i64 0 + %1 = extractelement <16 x i32> %bc, i64 0 + %2 = extractelement <16 x i32> , i64 0 + %3 = mul i32 %1, %2 + %4 = mul i32 0, 0 + %5 = sub i32 %3, %4 + %6 = mul i32 0, 0 + %7 = mul i32 0, %1 + %8 = add i32 %6, %7 + store i32 %5, ptr null, align 16 + store i32 %8, ptr getelementptr inbounds ([9 x i32], ptr null, i64 0, i64 1), align 2 + %9 = extractelement <16 x i32> zeroinitializer, i64 0 + %10 = mul i32 %9, 0 + %11 = extractelement <16 x i32> zeroinitializer, i64 0 + %12 = mul i32 0, 0 + %13 = sub i32 %10, %12 + %14 = mul i32 0, %0 + %15 = mul i32 0, 0 + %16 = add i32 %14, %15 + store i32 %13, ptr getelementptr inbounds ([9 x i32], ptr null, i64 0, i64 2), align 4 + store i32 %16, ptr getelementptr inbounds ([9 x i32], ptr null, i64 0, i64 3), align 2 + %17 = extractelement <16 x i32> , i64 0 + %18 = mul i32 %17, 0 + %19 = mul i32 0, 0 + %20 = sub i32 %18, %19 + %21 = extractelement <16 x i32> zeroinitializer, i64 1 + %22 = mul i32 %21, %0 + %23 = mul i32 0, %17 + %24 = add i32 %22, %23 + store i32 %20, ptr getelementptr inbounds ([9 x i32], ptr null, i64 0, i64 4), align 8 + store i32 %24, ptr getelementptr inbounds ([9 x i32], ptr null, i64 0, i64 5), align 2 + %25 = mul i32 0, %11 + %26 = extractelement <16 x i32> zeroinitializer, i64 0 + %27 = mul i32 0, 0 + %28 = sub i32 %25, %27 + %29 = mul i32 0, 0 + %30 = mul i32 %26, 0 + %31 = add i32 %29, %30 + store i32 %28, ptr getelementptr inbounds ([9 x i32], ptr null, i64 0, i64 6), align 4 + store i32 %31, ptr getelementptr inbounds ([9 x i32], ptr null, i64 0, i64 7), align 2 + ret i32 0 +} + + -- 2.7.4