From cf792f664a75ea2b45962e81ea4487fc44b00232 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 27 Apr 2023 09:00:21 -0700 Subject: [PATCH] [SLP]Fix a crash for the replaced vectorized value. If two nodes share the same value, which is replaced in one of the nodes, need to automatically replace same value in all nodes. Btter to use WeakTrackingVH for this to fix compiler crash. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- .../SLPVectorizer/X86/postponed_gathers.ll | 75 ++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2065f41..5e7ac5f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2600,7 +2600,7 @@ private: ValueList Scalars; /// The Scalars are vectorized into this value. It is initialized to Null. - Value *VectorizedValue = nullptr; + WeakTrackingVH VectorizedValue = nullptr; /// Do we need to gather this sequence or vectorize it /// (either with vector instruction or with scatter/gather diff --git a/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll b/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll new file mode 100644 index 0000000..681d131 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s + +define void @foo() { +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: bci_0: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) null, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> , i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[BCI_252:%.*]] +; CHECK: bci_252: +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[BCI_0:%.*]] ], [ [[TMP16:%.*]], [[BCI_252_1:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: br i1 false, label [[NOT_ZERO70:%.*]], label [[BCI_252_1]] +; CHECK: bci_252.1: +; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i32> [[TMP2]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <2 x i32> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP16]] = or <2 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: br label [[BCI_252]] +; CHECK: not_zero70: +; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP9]], [[BCI_252]] ] +; CHECK-NEXT: ret void +; +bci_0: + %0 = load i32, ptr addrspace(1) null, align 8 + br label %bci_252 + +bci_252: + %1 = phi i32 [ 0, %bci_0 ], [ %20, %bci_252.1 ] + %2 = phi i32 [ 0, %bci_0 ], [ %15, %bci_252.1 ] + %3 = mul i32 %0, 0 + %4 = or i32 %0, %3 + %5 = or i32 %4, 0 + %.neg91.neg = or i32 %2, 0 + %.neg446 = or i32 %.neg91.neg, %5 + %6 = or i32 %.neg446, 0 + %7 = mul i32 0, 0 + %8 = or i32 %0, %7 + %9 = or i32 %8, 0 + %.neg91.1.neg = or i32 %1, 0 + %.neg448 = or i32 %.neg91.1.neg, %9 + %10 = or i32 %.neg448, 0 + br i1 false, label %not_zero70, label %bci_252.1 + +bci_252.1: + %11 = or i32 %0, 0 + %12 = mul i32 %11, 0 + %13 = or i32 %0, %12 + %14 = or i32 %13, 0 + %.neg91.neg.1 = or i32 %6, 0 + %.neg446.1 = or i32 %.neg91.neg.1, %14 + %15 = or i32 %.neg446.1, 0 + %16 = or i32 %0, 0 + %17 = mul i32 %16, 0 + %18 = or i32 %0, %17 + %19 = or i32 %18, 0 + %.neg91.1.neg.1 = or i32 %10, 0 + %.neg448.1 = or i32 %.neg91.1.neg.1, %19 + %20 = or i32 %.neg448.1, 0 + br label %bci_252 + +not_zero70: + %.lcssa546 = phi i32 [ %6, %bci_252 ] + %.lcssa545 = phi i32 [ %10, %bci_252 ] + ret void +} -- 2.7.4