From 9f5d783d467523d6218a63883b45192c8a875c96 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 2 Jun 2021 22:05:14 +0100 Subject: [PATCH] [X86][SSE] combineScalarToVector - only reuse broadcasts for scalar_to_vector if the source operands scalar types match We were hitting an issue when the scalar_to_vector source was being implicitly truncated (in this case to i8 to vXi1) but we were also using the i8 source in a broadcast to a vXi8 value. Fixes PR50374 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 24 ++++++++++++++---------- llvm/test/CodeGen/X86/pr50374.ll | 31 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr50374.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1fcb52a..a658287 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50473,16 +50473,20 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { // See if we're broadcasting the scalar value, in which case just reuse that. // Ensure the same SDValue from the SDNode use is being used. - for (SDNode *User : Src->uses()) - if (User->getOpcode() == X86ISD::VBROADCAST && Src == User->getOperand(0)) { - unsigned SizeInBits = VT.getFixedSizeInBits(); - unsigned BroadcastSizeInBits = User->getValueSizeInBits(0).getFixedSize(); - if (BroadcastSizeInBits == SizeInBits) - return SDValue(User, 0); - if (BroadcastSizeInBits > SizeInBits) - return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits); - // TODO: Handle BroadcastSizeInBits < SizeInBits when we have test coverage. - } + if (VT.getScalarType() == Src.getValueType()) + for (SDNode *User : Src->uses()) + if (User->getOpcode() == X86ISD::VBROADCAST && + Src == User->getOperand(0)) { + unsigned SizeInBits = VT.getFixedSizeInBits(); + unsigned BroadcastSizeInBits = + User->getValueSizeInBits(0).getFixedSize(); + if (BroadcastSizeInBits == SizeInBits) + return SDValue(User, 0); + if (BroadcastSizeInBits > SizeInBits) + return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits); + // TODO: Handle BroadcastSizeInBits < SizeInBits when we have test + // coverage. + } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/pr50374.ll b/llvm/test/CodeGen/X86/pr50374.ll new file mode 100644 index 0000000..fe1286d --- /dev/null +++ b/llvm/test/CodeGen/X86/pr50374.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s + +define void @PR50374() { +; CHECK-LABEL: PR50374: +; CHECK: # %bb.0: # %while.84.body.preheader +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %vector.body1999 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_1 +while.84.body.preheader: + %0 = load i8, i8* undef, align 1 + %1 = load i8, i8* undef, align 4 + %.not14.2.2 = icmp eq i8 0, 0 + %2 = and i8 %0, 1 + %.not14.1.2 = icmp eq i8 %2, 0 + %3 = select i1 %.not14.2.2, i1 %.not14.1.2, i1 false + %.not14.2361 = icmp eq i8 0, 0 + %4 = select i1 %3, i1 %.not14.2361, i1 false + %add.10961.i.2.2 = select reassoc nsz contract i1 %4, float 0.000000e+00, float 0x7FF8000000000000 + %broadcast.splatinsert2024 = insertelement <8 x float> poison, float %add.10961.i.2.2, i32 0 + %broadcast.splat2025 = shufflevector <8 x float> %broadcast.splatinsert2024, <8 x float> poison, <8 x i32> zeroinitializer + %broadcast.splatinsert2049 = insertelement <8 x i8> poison, i8 %1, i32 0 + %broadcast.splat2050 = shufflevector <8 x i8> %broadcast.splatinsert2049, <8 x i8> poison, <8 x i32> zeroinitializer + br label %vector.body1999 + +vector.body1999: ; preds = %vector.body1999, %while.84.body.preheader + %predphi2026 = select <8 x i1> undef, <8 x float> undef, <8 x float> %broadcast.splat2025 + %predphi2051 = select <8 x i1> undef, <8 x i8> %broadcast.splat2050, <8 x i8> undef + br label %vector.body1999 +} -- 2.7.4