From ac84850ea620df7f799c38f5a4f2c788eabbea78 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 27 Jul 2017 18:15:54 +0000 Subject: [PATCH] [SelectionDAG] Improve DAGTypeLegalizer::convertMask assertion (PR33960) Improve DAGTypeLegalizer::convertMask's isSETCCorConvertedSETCC assertion to properly check for any mixture of SETCC or BUILD_VECTOR of constants, or a logical mask op of them. llvm-svn: 309302 --- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 21 +++++------- llvm/test/CodeGen/X86/pr33960.ll | 39 ++++++++++++++++++++++ 2 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr33960.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index b926176..dc844fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2965,7 +2965,12 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { else if (N.getOpcode() == ISD::SIGN_EXTEND) N = N.getOperand(0); - return (N.getOpcode() == ISD::SETCC); + if (isLogicalMaskOp(N.getOpcode())) + return isSETCCorConvertedSETCC(N.getOperand(0)) && + isSETCCorConvertedSETCC(N.getOperand(1)); + + return (N.getOpcode() == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -2973,28 +2978,20 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { // to ToMaskVT if needed with vector extension or truncation. SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT) { - LLVMContext &Ctx = *DAG.getContext(); - // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. - unsigned InMaskOpc = InMask->getOpcode(); - // FIXME: This code seems to be too restrictive, we might consider // generalizing it or dropping it. - assert((InMaskOpc == ISD::SETCC || - ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) || - (isLogicalMaskOp(InMaskOpc) && - isSETCCorConvertedSETCC(InMask->getOperand(0)) && - isSETCCorConvertedSETCC(InMask->getOperand(1)))) && - "Unexpected mask argument."); + assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. SmallVector Ops; for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMaskOpc, SDLoc(InMask), MaskVT, Ops); + SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. + LLVMContext &Ctx = *DAG.getContext(); unsigned MaskScalarBits = MaskVT.getScalarSizeInBits(); unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits(); if (MaskScalarBits < ToMaskScalBits) { diff --git a/llvm/test/CodeGen/X86/pr33960.ll b/llvm/test/CodeGen/X86/pr33960.ll new file mode 100644 index 0000000..fb9236d --- /dev/null +++ b/llvm/test/CodeGen/X86/pr33960.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 + +@b = external local_unnamed_addr global i32, align 4 + +define void @PR33960() { +; X86-LABEL: PR33960: +; X86: # BB#0: # %entry +; X86-NEXT: movl $0, b +; X86-NEXT: retl +; +; X64-LABEL: PR33960: +; X64: # BB#0: # %entry +; X64-NEXT: movl $0, {{.*}}(%rip) +; X64-NEXT: retq +entry: + %tmp = insertelement <4 x i32> , i32 -2, i32 3 + %predphi26 = insertelement <4 x i32> %tmp, i32 -7, i32 0 + %tmp1 = trunc <4 x i32> %predphi26 to <4 x i16> + %tmp2 = icmp eq <4 x i16> %tmp1, zeroinitializer + %tmp3 = icmp eq <4 x i32> undef, zeroinitializer + %tmp4 = and <4 x i1> %tmp2, %tmp3 + %predphi17 = select <4 x i1> %tmp4, <4 x i32> undef, <4 x i32> zeroinitializer + %tmp5 = shl <4 x i32> %predphi17, + %tmp6 = ashr exact <4 x i32> %tmp5, + %tmp7 = or <4 x i32> %tmp6, undef + %tmp8 = or <4 x i32> undef, %tmp7 + %tmp9 = or <4 x i32> undef, %tmp8 + %tmp10 = or <4 x i32> undef, %tmp9 + %tmp11 = or <4 x i32> undef, %tmp10 + %tmp12 = or <4 x i32> undef, %tmp11 + %bin.rdx = or <4 x i32> %tmp12, undef + %bin.rdx19 = or <4 x i32> %bin.rdx, undef + %tmp13 = extractelement <4 x i32> %bin.rdx19, i32 0 + %or = or i32 0, %tmp13 + store i32 %or, i32* @b, align 4 + ret void +} -- 2.7.4