From 73250168e723b1507bc15e8fa6670aa5d3f3ac2c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Apr 2017 04:26:20 +0000 Subject: [PATCH] [DAGCombiner] Fix fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) to explicitly ensure that only one of the inputs of each shuffle is a zero vector. This can only happen when we have a mix of zero and undef elements and the two vectors have a different arrangement of zeros/undefs. The shuffle should eventually be constant folded to all zeros. Fixes PR32484. llvm-svn: 299291 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/test/CodeGen/X86/pr32484.ll | 32 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr32484.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9fbd409..d21dde0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4082,7 +4082,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); // Ensure both shuffles have a zero input. - if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) { + if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); const ShuffleVectorSDNode *SV0 = cast(N0); diff --git a/llvm/test/CodeGen/X86/pr32484.ll b/llvm/test/CodeGen/X86/pr32484.ll new file mode 100644 index 0000000..74857f8 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr32484.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @foo() { +; CHECK-LABEL: foo: +; CHECK: # BB#0: +; CHECK-NEXT: # implicit-def: %RAX +; CHECK-NEXT: jmpq *%rax +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: # implicit-def: %RAX +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: movdqu %xmm1, (%rax) +; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: retq + indirectbr i8* undef, [label %9, label %1] + +;