void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
+ void loadVectorConstant(const SystemZVectorConstantInfo &VCI,
+ SDNode *Node);
+
// Try to use gather instruction Opcode to implement vector insertion N.
bool tryGather(SDNode *N, unsigned Opcode);
SelectCode(Or.getNode());
}
+void SystemZDAGToDAGISel::loadVectorConstant(
+ const SystemZVectorConstantInfo &VCI, SDNode *Node) {
+ assert((VCI.Opcode == SystemZISD::BYTE_MASK ||
+ VCI.Opcode == SystemZISD::REPLICATE ||
+ VCI.Opcode == SystemZISD::ROTATE_MASK) &&
+ "Bad opcode!");
+ assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type");
+ EVT VT = Node->getValueType(0);
+ SDLoc DL(Node);
+ SmallVector<SDValue, 2> Ops;
+ for (unsigned OpVal : VCI.OpVals)
+ Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
+ SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
+
+ if (VCI.VecVT == VT.getSimpleVT())
+ ReplaceNode(Node, Op.getNode());
+ else if (VT.getSizeInBits() == 128) {
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op);
+ ReplaceNode(Node, BitCast.getNode());
+ SelectCode(BitCast.getNode());
+ } else { // float or double
+ unsigned SubRegIdx =
+ (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64);
+ ReplaceNode(
+ Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode());
+ }
+ SelectCode(Op.getNode());
+}
+
bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
SDValue ElemV = N->getOperand(2);
auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
case ISD::BUILD_VECTOR: {
auto *BVN = cast<BuildVectorSDNode>(Node);
- SDLoc DL(Node);
- EVT VT = Node->getValueType(0);
- uint64_t Mask = 0;
- if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) {
- SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT,
- CurDAG->getTargetConstant(Mask, DL, MVT::i32));
- ReplaceNode(Node, Res);
+ SystemZVectorConstantInfo VCI(BVN);
+ if (VCI.isVectorConstantLegal(*Subtarget)) {
+ loadVectorConstant(VCI, Node);
return;
}
break;
APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF();
if (Imm.isZero() || Imm.isNegZero())
break;
- const SystemZInstrInfo *TII = getInstrInfo();
- EVT VT = Node->getValueType(0);
- unsigned Start, End;
- unsigned BitWidth = VT.getSizeInBits();
- bool Success = SystemZTargetLowering::analyzeFPImm(Imm, BitWidth, Start,
- End, static_cast<const SystemZInstrInfo *>(TII)); (void)Success;
+ SystemZVectorConstantInfo VCI(Imm);
+ bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success;
assert(Success && "Expected legal FP immediate");
- SDLoc DL(Node);
- unsigned Opcode = (BitWidth == 32 ? SystemZ::VGMF : SystemZ::VGMG);
- SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT,
- CurDAG->getTargetConstant(Start, DL, MVT::i32),
- CurDAG->getTargetConstant(End, DL, MVT::i32));
- unsigned SubRegIdx = (BitWidth == 32 ? SystemZ::subreg_h32
- : SystemZ::subreg_h64);
- Res = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SDValue(Res, 0))
- .getNode();
- ReplaceNode(Node, Res);
+ loadVectorConstant(VCI, Node);
return;
}
return false;
}
+// Return true if the constant can be generated with a vector instruction,
+// such as VGM, VGMB or VREPI.
+bool SystemZVectorConstantInfo::isVectorConstantLegal(
+ const SystemZSubtarget &Subtarget) {
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ if (!Subtarget.hasVector() ||
+ (isFP128 && !Subtarget.hasVectorEnhancements1()))
+ return false;
-// Return true if Imm can be generated with a vector instruction, such as VGM.
-bool SystemZTargetLowering::
-analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start,
- unsigned &End, const SystemZInstrInfo *TII) {
- APInt IntImm = Imm.bitcastToAPInt();
- if (IntImm.getActiveBits() > 64)
+ // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
+ // preferred way of creating all-zero and all-one vectors so give it
+ // priority over other methods below.
+ unsigned Mask = 0;
+ unsigned I = 0;
+ for (; I < SystemZ::VectorBytes; ++I) {
+ uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
+ if (Byte == 0xff)
+ Mask |= 1ULL << I;
+ else if (Byte != 0)
+ break;
+ }
+ if (I == SystemZ::VectorBytes) {
+ Opcode = SystemZISD::BYTE_MASK;
+ OpVals.push_back(Mask);
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
+ return true;
+ }
+
+ if (SplatBitSize > 64)
return false;
- // See if this immediate could be generated with VGM.
- bool Success = TII->isRxSBGMask(IntImm.getZExtValue(), BitWidth, Start, End);
- if (!Success)
+ auto tryValue = [&](uint64_t Value) -> bool {
+ // Try VECTOR REPLICATE IMMEDIATE
+ int64_t SignedValue = SignExtend64(Value, SplatBitSize);
+ if (isInt<16>(SignedValue)) {
+ OpVals.push_back(((unsigned) SignedValue));
+ Opcode = SystemZISD::REPLICATE;
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+ SystemZ::VectorBits / SplatBitSize);
+ return true;
+ }
+ // Try VECTOR GENERATE MASK
+ unsigned Start, End;
+ if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
+ // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
+ // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
+ // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
+ OpVals.push_back(Start - (64 - SplatBitSize));
+ OpVals.push_back(End - (64 - SplatBitSize));
+ Opcode = SystemZISD::ROTATE_MASK;
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+ SystemZ::VectorBits / SplatBitSize);
+ return true;
+ }
return false;
- // isRxSBGMask returns the bit numbers for a full 64-bit value,
- // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
- // bit numbers for an BitsPerElement value, so that 0 denotes
- // 1 << (BitsPerElement-1).
- Start -= 64 - BitWidth;
- End -= 64 - BitWidth;
- return true;
+ };
+
+ // First try assuming that any undefined bits above the highest set bit
+ // and below the lowest set bit are 1s. This increases the likelihood of
+ // being able to use a sign-extended element value in VECTOR REPLICATE
+ // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+ uint64_t SplatBitsZ = SplatBits.getZExtValue();
+ uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+ uint64_t Lower =
+ (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+ uint64_t Upper =
+ (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+ if (tryValue(SplatBitsZ | Upper | Lower))
+ return true;
+
+ // Now try assuming that any undefined bits between the first and
+ // last defined set bits are set. This increases the chances of
+ // using a non-wraparound mask.
+ uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+ return tryValue(SplatBitsZ | Middle);
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
+ IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
+ isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
+
+ // Find the smallest splat.
+ SplatBits = FPImm.bitcastToAPInt();
+ unsigned Width = SplatBits.getBitWidth();
+ while (Width > 8) {
+ unsigned HalfSize = Width / 2;
+ APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatBits.trunc(HalfSize);
+
+ // If the two halves do not match, stop here.
+ if (HighValue != LowValue || 8 > HalfSize)
+ break;
+
+ SplatBits = HighValue;
+ Width = HalfSize;
+ }
+ SplatUndef = 0;
+ SplatBitSize = Width;
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
+ assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
+ bool HasAnyUndefs;
+
+ // Get IntBits by finding the 128 bit splat.
+ BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
+ true);
+
+ // Get SplatBits by finding the 8 bit or greater splat.
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
+ true);
}
bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (Imm.isZero() || Imm.isNegZero())
return true;
- if (!Subtarget.hasVector())
- return false;
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
- unsigned Start, End;
- return analyzeFPImm(Imm, VT.getSizeInBits(), Start, End, TII);
+ return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
}
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
}
-// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style
-// mask. Store the mask value in Mask on success.
-bool SystemZTargetLowering::
-tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
- EVT ElemVT = BVN->getValueType(0).getVectorElementType();
- unsigned BytesPerElement = ElemVT.getStoreSize();
- for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
- SDValue Op = BVN->getOperand(I);
- if (!Op.isUndef()) {
- uint64_t Value;
- if (Op.getOpcode() == ISD::Constant)
- Value = cast<ConstantSDNode>(Op)->getZExtValue();
- else if (Op.getOpcode() == ISD::ConstantFP)
- Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
- .getZExtValue());
- else
- return false;
- for (unsigned J = 0; J < BytesPerElement; ++J) {
- uint64_t Byte = (Value >> (J * 8)) & 0xff;
- if (Byte == 0xff)
- Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
- else if (Byte != 0)
- return false;
- }
- }
- }
- return true;
-}
-
-// Try to load a vector constant in which BitsPerElement-bit value Value
-// is replicated to fill the vector. VT is the type of the resulting
-// constant, which may have elements of a different size from BitsPerElement.
-// Return the SDValue of the constant on success, otherwise return
-// an empty value.
-static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
- const SystemZInstrInfo *TII,
- const SDLoc &DL, EVT VT, uint64_t Value,
- unsigned BitsPerElement) {
- // Signed 16-bit values can be replicated using VREPI.
- // Mark the constants as opaque or DAGCombiner will convert back to
- // BUILD_VECTOR.
- int64_t SignedValue = SignExtend64(Value, BitsPerElement);
- if (isInt<16>(SignedValue)) {
- MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
- SystemZ::VectorBits / BitsPerElement);
- SDValue Op = DAG.getNode(
- SystemZISD::REPLICATE, DL, VecVT,
- DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
- }
- // See whether rotating the constant left some N places gives a value that
- // is one less than a power of 2 (i.e. all zeros followed by all ones).
- // If so we can use VGM.
- unsigned Start, End;
- if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
- // isRxSBGMask returns the bit numbers for a full 64-bit value,
- // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
- // bit numbers for an BitsPerElement value, so that 0 denotes
- // 1 << (BitsPerElement-1).
- Start -= 64 - BitsPerElement;
- End -= 64 - BitsPerElement;
- MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
- SystemZ::VectorBits / BitsPerElement);
- SDValue Op = DAG.getNode(
- SystemZISD::ROTATE_MASK, DL, VecVT,
- DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
- DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
- }
- return SDValue();
-}
-
// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (BVN->isConstant()) {
- // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
- // preferred way of creating all-zero and all-one vectors so give it
- // priority over other methods below.
- uint64_t Mask;
- if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
- ISD::isBuildVectorAllOnes(Op.getNode()) ||
- (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask)))
+ if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
return Op;
- // Try using some form of replication.
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
- 8, true) &&
- SplatBitSize <= 64) {
- // First try assuming that any undefined bits above the highest set bit
- // and below the lowest set bit are 1s. This increases the likelihood of
- // being able to use a sign-extended element value in VECTOR REPLICATE
- // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
- uint64_t SplatBitsZ = SplatBits.getZExtValue();
- uint64_t SplatUndefZ = SplatUndef.getZExtValue();
- uint64_t Lower = (SplatUndefZ
- & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
- uint64_t Upper = (SplatUndefZ
- & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
- uint64_t Value = SplatBitsZ | Upper | Lower;
- SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
- SplatBitSize);
- if (Op.getNode())
- return Op;
-
- // Now try assuming that any undefined bits between the first and
- // last defined set bits are set. This increases the chances of
- // using a non-wraparound mask.
- uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
- Value = SplatBitsZ | Middle;
- Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
- if (Op.getNode())
- return Op;
- }
-
// Fall back to loading it from memory.
return SDValue();
}
OPCODE(TBEGIN);
OPCODE(TBEGIN_NOFLOAT);
OPCODE(TEND);
+ OPCODE(BYTE_MASK);
OPCODE(ROTATE_MASK);
OPCODE(REPLICATE);
OPCODE(JOIN_DWORDS);
-; Test vector byte masks, v4f32 version. Only all-zero vectors are handled.
+; Test vector byte masks, v4f32 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test an all-zeros vector.
-define <4 x float> @f0() {
-; CHECK-LABEL: f0:
+define <4 x float> @f1() {
+; CHECK-LABEL: f1:
; CHECK: vgbm %v24, 0
; CHECK: br %r14
ret <4 x float> zeroinitializer
}
-; Test that undefs are treated as zero.
-define <4 x float> @f1() {
-; CHECK-LABEL: f1:
-; CHECK: vgbm %v24, 0
+; Test an all-ones vector.
+define <4 x float> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
; CHECK: br %r14
- ret <4 x float> <float zeroinitializer, float undef,
- float zeroinitializer, float undef>
+ ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000,
+ float 0xffffffffe0000000, float 0xffffffffe0000000>
+}
+
+; Test a mixed vector (mask 0xc731).
+define <4 x float> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 50993
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000,
+ float 0x379fffe000000000, float 0x371fe00000000000>
+}
+
+; Test that undefs are treated as zero (mask 0xc031).
+define <4 x float> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 49201
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffe00000000000, float undef,
+ float 0x379fffe000000000, float 0x371fe00000000000>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <4 x float> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000,
+ float 0x379fffe000000000, float 0x371fe00000000000>
}
; Test an all-zeros v2f32 that gets promoted to v4f32.
-define <2 x float> @f2() {
-; CHECK-LABEL: f2:
+define <2 x float> @f6() {
+; CHECK-LABEL: f6:
; CHECK: vgbm %v24, 0
; CHECK: br %r14
ret <2 x float> zeroinitializer
}
+
+; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700).
+define <2 x float> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgbm %v24, 50944
+; CHECK: br %r14
+ ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000>
+}