From da07b3ad420ef31b50a3c3621c9445a2c15fef45 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sat, 17 Dec 2011 01:08:46 +0000 Subject: [PATCH] Make sure that the lower bits on the VSELECT condition are properly set. llvm-svn: 146800 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 52 +++++++++++++++------------ llvm/test/CodeGen/X86/2011-12-15-vec_shift.ll | 15 +++++--- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a7b38a5..0cf0bd9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10168,48 +10168,54 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MUL, dl, VT, Op, R); } if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) { + assert((Subtarget->hasSSE2() || Subtarget->hasAVX()) && + "Need SSE2 for pslli/pcmpeq."); + // a = a << 5; Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), Op.getOperand(1), DAG.getConstant(5, MVT::i32)); - ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15)); - ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63)); + // Turn 'a' into a mask suitable for VSELECT + SDValue VSelM = DAG.getConstant(0x80, VT); + SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); + OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32), + OpVSel, VSelM); - std::vector CVM1(16, CM1); - std::vector CVM2(16, CM2); - Constant *C = ConstantVector::get(CVM1); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); - SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); + SDValue CM1 = DAG.getConstant(0x0f, VT); + SDValue CM2 = DAG.getConstant(0x3f, VT); - // r = pblendv(r, psllw(r & (char16)15, 4), a); - M = DAG.getNode(ISD::AND, dl, VT, R, M); + // r = VSELECT(r, psllw(r & (char16)15, 4), a); + SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1); M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); + R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R); + // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); + OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32), + OpVSel, VSelM); - C = ConstantVector::get(CVM2); - CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); - M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); - - // r = pblendv(r, psllw(r & (char16)63, 2), a); - M = DAG.getNode(ISD::AND, dl, VT, R, M); + // r = VSELECT(r, psllw(r & (char16)63, 2), a); + M = DAG.getNode(ISD::AND, dl, VT, R, CM2); M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); + R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R); + // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); + OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32), + OpVSel, VSelM); - // return pblendv(r, r+r, a); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, + // return VSELECT(r, r+r, a); + R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, DAG.getNode(ISD::ADD, dl, VT, R, R), R); return R; } diff --git a/llvm/test/CodeGen/X86/2011-12-15-vec_shift.ll b/llvm/test/CodeGen/X86/2011-12-15-vec_shift.ll index 2b98b5a..6f9188c 100644 --- a/llvm/test/CodeGen/X86/2011-12-15-vec_shift.ll +++ b/llvm/test/CodeGen/X86/2011-12-15-vec_shift.ll @@ -1,12 +1,19 @@ -; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s +; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4 +; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4 ; Test case for r146671 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.7" define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind { - ; CHECK: psllw $4, [[REG:%xmm.]] - ; CHECK-NEXT: movdqa - ; CHECK-NEXT: pblendvb [[REG]],{{ %xmm.}} + ; Make sure operands to pblend are in the right order. + ; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]] + ; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}} + ; CHECK-W-SSE4: psllw $2 + + ; Make sure we're masking and pcmp'ing the VSELECT conditon vector. + ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]] + ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]] + ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]] %1 = shl <16 x i8> %a, %b ret <16 x i8> %1 } -- 2.7.4