From a93407fadfe601f28e0a2df5e7946195f4f414c4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 29 Sep 2018 17:36:22 +0000 Subject: [PATCH] [X86][SSE] LowerScalarImmediateShift - remove 32-bit vXi64 special case handling. This is all handled generally by getTargetConstantBitsFromNode now llvm-svn: 343387 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 194 +++++++++++--------------------- 1 file changed, 65 insertions(+), 129 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0ba6dc3..6a131cd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23515,147 +23515,83 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Optimize shl/srl/sra with constant shift amount. APInt UndefElts; SmallVector EltBits; - if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits, - true, false)) { - int SplatIndex = -1; - for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { - if (UndefElts[i]) - continue; - if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) - return SDValue(); - SplatIndex = i; - } - if (SplatIndex < 0) - return SDValue(); + if (!getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits, + true, false)) + return SDValue(); - uint64_t ShiftAmt = EltBits[SplatIndex].getZExtValue(); - if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) - return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + int SplatIndex = -1; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + if (UndefElts[i]) + continue; + if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) + return SDValue(); + SplatIndex = i; + } + if (SplatIndex < 0) + return SDValue(); - // i64 SRA needs to be performed as partial shifts. - if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || - (Subtarget.hasInt256() && VT == MVT::v4i64)) && - Op.getOpcode() == ISD::SRA) - return ArithmeticShiftRight64(ShiftAmt); + uint64_t ShiftAmt = EltBits[SplatIndex].getZExtValue(); + if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) + return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); - if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) || - VT == MVT::v64i8) { - unsigned NumElts = VT.getVectorNumElements(); - MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); - - // Simple i8 add case - if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) - return DAG.getNode(ISD::ADD, dl, VT, R, R); - - // ashr(R, 7) === cmp_slt(R, 0) - if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { - SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); - if (VT.is512BitVector()) { - assert(VT == MVT::v64i8 && "Unexpected element type!"); - SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT); - return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); - } - return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); - } + // i64 SRA needs to be performed as partial shifts. + if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || + (Subtarget.hasInt256() && VT == MVT::v4i64)) && + Op.getOpcode() == ISD::SRA) + return ArithmeticShiftRight64(ShiftAmt); - // XOP can shift v16i8 directly instead of as shift v8i16 + mask. - if (VT == MVT::v16i8 && Subtarget.hasXOP()) - return SDValue(); + if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) || + VT == MVT::v64i8) { + unsigned NumElts = VT.getVectorNumElements(); + MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); - if (Op.getOpcode() == ISD::SHL) { - // Make a large shift. - SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, - ShiftAmt, DAG); - SHL = DAG.getBitcast(VT, SHL); - // Zero out the rightmost bits. - return DAG.getNode(ISD::AND, dl, VT, SHL, - DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT)); - } - if (Op.getOpcode() == ISD::SRL) { - // Make a large shift. - SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, - ShiftAmt, DAG); - SRL = DAG.getBitcast(VT, SRL); - // Zero out the leftmost bits. - return DAG.getNode(ISD::AND, dl, VT, SRL, - DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT)); - } - if (Op.getOpcode() == ISD::SRA) { - // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask) - SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + // Simple i8 add case + if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) + return DAG.getNode(ISD::ADD, dl, VT, R, R); - SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); - Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); - Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); - return Res; + // ashr(R, 7) === cmp_slt(R, 0) + if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { + SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); + if (VT.is512BitVector()) { + assert(VT == MVT::v64i8 && "Unexpected element type!"); + SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT); + return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); } - llvm_unreachable("Unknown shift opcode."); + return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); } - } - // Check cases (mainly 32-bit) where i64 is expanded into high and low parts. - // TODO: Replace constant extraction with getTargetConstantBitsFromNode. - if (!Subtarget.hasXOP() && - (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) || - (Subtarget.hasAVX512() && VT == MVT::v8i64))) { - - // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant. - unsigned SubVectorScale = 1; - if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) { - SubVectorScale = - Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits(); - Amt = Amt.getOperand(0); - } - - // Peek through any splat that was introduced for i64 shift vectorization. - int SplatIndex = -1; - if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) - if (SVN->isSplat()) { - SplatIndex = SVN->getSplatIndex(); - Amt = Amt.getOperand(0); - assert(SplatIndex < (int)VT.getVectorNumElements() && - "Splat shuffle referencing second operand"); - } - - if (Amt.getOpcode() != ISD::BITCAST || - Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR) + // XOP can shift v16i8 directly instead of as shift v8i16 + mask. + if (VT == MVT::v16i8 && Subtarget.hasXOP()) return SDValue(); - Amt = Amt.getOperand(0); - unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / - (SubVectorScale * VT.getVectorNumElements()); - unsigned RatioInLog2 = Log2_32_Ceil(Ratio); - uint64_t ShiftAmt = 0; - unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); - for (unsigned i = 0; i != Ratio; ++i) { - ConstantSDNode *C = dyn_cast(Amt.getOperand(i + BaseOp)); - if (!C) - return SDValue(); - // 6 == Log2(64) - ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); - } - - // Check remaining shift amounts (if not a splat). - if (SplatIndex < 0) { - for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { - uint64_t ShAmt = 0; - for (unsigned j = 0; j != Ratio; ++j) { - ConstantSDNode *C = dyn_cast(Amt.getOperand(i + j)); - if (!C) - return SDValue(); - // 6 == Log2(64) - ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); - } - if (ShAmt != ShiftAmt) - return SDValue(); - } + if (Op.getOpcode() == ISD::SHL) { + // Make a large shift. + SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, + ShiftAmt, DAG); + SHL = DAG.getBitcast(VT, SHL); + // Zero out the rightmost bits. + return DAG.getNode(ISD::AND, dl, VT, SHL, + DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT)); + } + if (Op.getOpcode() == ISD::SRL) { + // Make a large shift. + SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, + ShiftAmt, DAG); + SRL = DAG.getBitcast(VT, SRL); + // Zero out the leftmost bits. + return DAG.getNode(ISD::AND, dl, VT, SRL, + DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT)); + } + if (Op.getOpcode() == ISD::SRA) { + // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask) + SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + + SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); + return Res; } - - if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) - return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); - - if (Op.getOpcode() == ISD::SRA) - return ArithmeticShiftRight64(ShiftAmt); + llvm_unreachable("Unknown shift opcode."); } return SDValue(); -- 2.7.4