From 3405237f77111946db90d986f2b5d9b9f8cae8a1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 30 Sep 2019 18:43:44 +0000 Subject: [PATCH] [X86] Mask off upper bits of splat element in LowerBUILD_VECTORvXi1 when forming a SELECT. The i1 scalar would have been type legalized to i8, but that doesn't guarantee anything about the upper bits. If we're going to use it as condition we need to make sure the upper bits are 0. I've special cased ISD::SETCC conditions since that should guarantee zero upper bits. We could go further and use computeKnownBits, but we have no tests that would need that. Fixes PR43507. llvm-svn: 373246 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++++++-- llvm/test/CodeGen/X86/avx512-calling-conv.ll | 8 ++++---- llvm/test/CodeGen/X86/pr43507.ll | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr43507.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4dc8027..c479429 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8459,10 +8459,20 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, } // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" - if (IsSplat) - return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx), + if (IsSplat) { + // The build_vector allows the scalar element to be larger than the vector + // element type. We need to mask it to use as a condition unless we know + // the upper bits are zero. + // FIXME: Use computeKnownBits instead of checking specific opcode? + SDValue Cond = Op.getOperand(SplatIdx); + assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!"); + if (Cond.getOpcode() != ISD::SETCC) + Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond, + DAG.getConstant(1, dl, MVT::i8)); + return DAG.getSelect(dl, VT, Cond, DAG.getConstant(1, dl, VT), DAG.getConstant(0, dl, VT)); + } // insert elements one by one SDValue DstVec; diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index 8901cee..5fb114b3 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -729,12 +729,12 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: korw %k2, %k0, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: xorl %ecx, %ecx -; KNL-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) ; KNL-NEXT: movl $65535, %edx ## imm = 0xFFFF ; KNL-NEXT: movl $0, %esi ; KNL-NEXT: cmovnel %edx, %esi ; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) ; KNL-NEXT: cmovnel %edx, %ecx ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kandw %k1, %k2, %k1 @@ -1314,11 +1314,11 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 ; KNL_X32-NEXT: xorl %eax, %eax -; KNL_X32-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl $65535, %ecx ## imm = 0xFFFF ; KNL_X32-NEXT: movl $0, %edx ; KNL_X32-NEXT: cmovnel %ecx, %edx -; KNL_X32-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: cmovnel %ecx, %eax ; KNL_X32-NEXT: kandw %k0, %k1, %k0 ; KNL_X32-NEXT: kmovw %edx, %k1 diff --git a/llvm/test/CodeGen/X86/pr43507.ll b/llvm/test/CodeGen/X86/pr43507.ll new file mode 100644 index 0000000..ec18d3c --- /dev/null +++ b/llvm/test/CodeGen/X86/pr43507.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s + +define <8 x i1> @ham(i64 %arg) { +; CHECK-LABEL: ham: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: movl $255, %ecx +; CHECK-NEXT: cmovel %eax, %ecx +; CHECK-NEXT: kmovd %ecx, %k0 +; CHECK-NEXT: vpmovm2w %k0, %xmm0 +; CHECK-NEXT: retq + %tmp = trunc i64 %arg to i1 + %tmp1 = insertelement <8 x i1> undef, i1 %tmp, i32 0 + %tmp2 = shufflevector <8 x i1> %tmp1, <8 x i1> undef, <8 x i32> zeroinitializer + ret <8 x i1> %tmp2 +} -- 2.7.4