[SystemZ] Wait with selection of legal vector/FP constants until Select().

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Tue, 26 Feb 2019 16:47:59 +0000 (16:47 +0000)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Tue, 26 Feb 2019 16:47:59 +0000 (16:47 +0000)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Tue, 26 Feb 2019 16:47:59 +0000 (16:47 +0000)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Tue, 26 Feb 2019 16:47:59 +0000 (16:47 +0000)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

index a963638..6c30057 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -304,6 +304,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
    void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
                             uint64_t UpperVal, uint64_t LowerVal);
  
+  void loadVectorConstant(const SystemZVectorConstantInfo &VCI,
+                          SDNode *Node);
+
    // Try to use gather instruction Opcode to implement vector insertion N.
    bool tryGather(SDNode *N, unsigned Opcode);
  
@@ -1132,6 +1135,35 @@ void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
    SelectCode(Or.getNode());
  }
  
+void SystemZDAGToDAGISel::loadVectorConstant(
+    const SystemZVectorConstantInfo &VCI, SDNode *Node) {
+  assert((VCI.Opcode == SystemZISD::BYTE_MASK ||
+          VCI.Opcode == SystemZISD::REPLICATE ||
+          VCI.Opcode == SystemZISD::ROTATE_MASK) &&
+         "Bad opcode!");
+  assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type");
+  EVT VT = Node->getValueType(0);
+  SDLoc DL(Node);
+  SmallVector<SDValue, 2> Ops;
+  for (unsigned OpVal : VCI.OpVals)
+    Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
+  SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
+
+  if (VCI.VecVT == VT.getSimpleVT())
+    ReplaceNode(Node, Op.getNode());
+  else if (VT.getSizeInBits() == 128) {
+    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op);
+    ReplaceNode(Node, BitCast.getNode());
+    SelectCode(BitCast.getNode());
+  } else { // float or double
+    unsigned SubRegIdx =
+        (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64);
+    ReplaceNode(
+        Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode());
+  }
+  SelectCode(Op.getNode());
+}
+
  bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
    SDValue ElemV = N->getOperand(2);
    auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
@@ -1529,13 +1561,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
  
    case ISD::BUILD_VECTOR: {
      auto *BVN = cast<BuildVectorSDNode>(Node);
-    SDLoc DL(Node);
-    EVT VT = Node->getValueType(0);
-    uint64_t Mask = 0;
-    if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) {
-      SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT,
-                                CurDAG->getTargetConstant(Mask, DL, MVT::i32));
-      ReplaceNode(Node, Res);
+    SystemZVectorConstantInfo VCI(BVN);
+    if (VCI.isVectorConstantLegal(*Subtarget)) {
+      loadVectorConstant(VCI, Node);
        return;
      }
      break;
@@ -1545,23 +1573,10 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
      APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF();
      if (Imm.isZero() || Imm.isNegZero())
        break;
-    const SystemZInstrInfo *TII = getInstrInfo();
-    EVT VT = Node->getValueType(0);
-    unsigned Start, End;
-    unsigned BitWidth = VT.getSizeInBits();
-    bool Success = SystemZTargetLowering::analyzeFPImm(Imm, BitWidth, Start,
-              End, static_cast<const SystemZInstrInfo *>(TII)); (void)Success;
+    SystemZVectorConstantInfo VCI(Imm);
+    bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success;
      assert(Success && "Expected legal FP immediate");
-    SDLoc DL(Node);
-    unsigned Opcode = (BitWidth == 32 ? SystemZ::VGMF : SystemZ::VGMG);
-    SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT,
-                            CurDAG->getTargetConstant(Start, DL, MVT::i32),
-                            CurDAG->getTargetConstant(End, DL, MVT::i32));
-    unsigned SubRegIdx = (BitWidth == 32 ? SystemZ::subreg_h32
-                                         : SystemZ::subreg_h64);
-    Res = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SDValue(Res, 0))
-            .getNode();
-    ReplaceNode(Node, Res);
+    loadVectorConstant(VCI, Node);
      return;
    }
  
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

index 310fd41..7012d9a 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -577,26 +577,118 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
    return false;
  }
  
+// Return true if the constant can be generated with a vector instruction,
+// such as VGM, VGMB or VREPI.
+bool SystemZVectorConstantInfo::isVectorConstantLegal(
+    const SystemZSubtarget &Subtarget) {
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  if (!Subtarget.hasVector() ||
+      (isFP128 && !Subtarget.hasVectorEnhancements1()))
+    return false;
  
-// Return true if Imm can be generated with a vector instruction, such as VGM.
-bool SystemZTargetLowering::
-analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start,
-             unsigned &End, const SystemZInstrInfo *TII) {
-  APInt IntImm = Imm.bitcastToAPInt();
-  if (IntImm.getActiveBits() > 64)
+  // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
+  // preferred way of creating all-zero and all-one vectors so give it
+  // priority over other methods below.
+  unsigned Mask = 0;
+  unsigned I = 0;
+  for (; I < SystemZ::VectorBytes; ++I) {
+    uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
+    if (Byte == 0xff)
+      Mask |= 1ULL << I;
+    else if (Byte != 0)
+      break;
+  }
+  if (I == SystemZ::VectorBytes) {
+    Opcode = SystemZISD::BYTE_MASK;
+    OpVals.push_back(Mask);
+    VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
+    return true;
+  }
+
+  if (SplatBitSize > 64)
      return false;
  
-  // See if this immediate could be generated with VGM.
-  bool Success = TII->isRxSBGMask(IntImm.getZExtValue(), BitWidth, Start, End);
-  if (!Success)
+  auto tryValue = [&](uint64_t Value) -> bool {
+    // Try VECTOR REPLICATE IMMEDIATE
+    int64_t SignedValue = SignExtend64(Value, SplatBitSize);
+    if (isInt<16>(SignedValue)) {
+      OpVals.push_back(((unsigned) SignedValue));
+      Opcode = SystemZISD::REPLICATE;
+      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+                               SystemZ::VectorBits / SplatBitSize);
+      return true;
+    }
+    // Try VECTOR GENERATE MASK
+    unsigned Start, End;
+    if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
+      // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
+      // denoting 1 << 63 and 63 denoting 1.  Convert them to bit numbers for
+      // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
+      OpVals.push_back(Start - (64 - SplatBitSize));
+      OpVals.push_back(End - (64 - SplatBitSize));
+      Opcode = SystemZISD::ROTATE_MASK;
+      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+                               SystemZ::VectorBits / SplatBitSize);
+      return true;
+    }
      return false;
-  // isRxSBGMask returns the bit numbers for a full 64-bit value,
-  // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
-  // bit numbers for an BitsPerElement value, so that 0 denotes
-  // 1 << (BitsPerElement-1).
-  Start -= 64 - BitWidth;
-  End -= 64 - BitWidth;
-  return true;
+  };
+
+  // First try assuming that any undefined bits above the highest set bit
+  // and below the lowest set bit are 1s.  This increases the likelihood of
+  // being able to use a sign-extended element value in VECTOR REPLICATE
+  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+  uint64_t SplatBitsZ = SplatBits.getZExtValue();
+  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+  uint64_t Lower =
+      (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+  uint64_t Upper =
+      (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+  if (tryValue(SplatBitsZ | Upper | Lower))
+    return true;
+
+  // Now try assuming that any undefined bits between the first and
+  // last defined set bits are set.  This increases the chances of
+  // using a non-wraparound mask.
+  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+  return tryValue(SplatBitsZ | Middle);
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
+  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
+  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
+
+  // Find the smallest splat.
+  SplatBits = FPImm.bitcastToAPInt();
+  unsigned Width = SplatBits.getBitWidth();
+  while (Width > 8) {
+    unsigned HalfSize = Width / 2;
+    APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
+    APInt LowValue = SplatBits.trunc(HalfSize);
+
+    // If the two halves do not match, stop here.
+    if (HighValue != LowValue || 8 > HalfSize)
+      break;
+
+    SplatBits = HighValue;
+    Width = HalfSize;
+  }
+  SplatUndef = 0;
+  SplatBitSize = Width;
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
+  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
+  bool HasAnyUndefs;
+
+  // Get IntBits by finding the 128 bit splat.
+  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
+                       true);
+
+  // Get SplatBits by finding the 8 bit or greater splat.
+  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
+                       true);
  }
  
  bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
@@ -604,12 +696,7 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
    if (Imm.isZero() || Imm.isNegZero())
      return true;
  
-  if (!Subtarget.hasVector())
-    return false;
-  const SystemZInstrInfo *TII =
-      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
-  unsigned Start, End;
-  return analyzeFPImm(Imm, VT.getSizeInBits(), Start, End, TII);
+  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
  }
  
  bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
@@ -4289,78 +4376,6 @@ static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
    return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
  }
  
-// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style
-// mask.  Store the mask value in Mask on success.
-bool SystemZTargetLowering::
-tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
-  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
-  unsigned BytesPerElement = ElemVT.getStoreSize();
-  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
-    SDValue Op = BVN->getOperand(I);
-    if (!Op.isUndef()) {
-      uint64_t Value;
-      if (Op.getOpcode() == ISD::Constant)
-        Value = cast<ConstantSDNode>(Op)->getZExtValue();
-      else if (Op.getOpcode() == ISD::ConstantFP)
-        Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
-                 .getZExtValue());
-      else
-        return false;
-      for (unsigned J = 0; J < BytesPerElement; ++J) {
-        uint64_t Byte = (Value >> (J * 8)) & 0xff;
-        if (Byte == 0xff)
-          Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
-        else if (Byte != 0)
-          return false;
-      }
-    }
-  }
-  return true;
-}
-
-// Try to load a vector constant in which BitsPerElement-bit value Value
-// is replicated to fill the vector.  VT is the type of the resulting
-// constant, which may have elements of a different size from BitsPerElement.
-// Return the SDValue of the constant on success, otherwise return
-// an empty value.
-static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
-                                       const SystemZInstrInfo *TII,
-                                       const SDLoc &DL, EVT VT, uint64_t Value,
-                                       unsigned BitsPerElement) {
-  // Signed 16-bit values can be replicated using VREPI.
-  // Mark the constants as opaque or DAGCombiner will convert back to
-  // BUILD_VECTOR.
-  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
-  if (isInt<16>(SignedValue)) {
-    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-                                 SystemZ::VectorBits / BitsPerElement);
-    SDValue Op = DAG.getNode(
-        SystemZISD::REPLICATE, DL, VecVT,
-        DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
-    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-  }
-  // See whether rotating the constant left some N places gives a value that
-  // is one less than a power of 2 (i.e. all zeros followed by all ones).
-  // If so we can use VGM.
-  unsigned Start, End;
-  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
-    // isRxSBGMask returns the bit numbers for a full 64-bit value,
-    // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
-    // bit numbers for an BitsPerElement value, so that 0 denotes
-    // 1 << (BitsPerElement-1).
-    Start -= 64 - BitsPerElement;
-    End -= 64 - BitsPerElement;
-    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-                                 SystemZ::VectorBits / BitsPerElement);
-    SDValue Op = DAG.getNode(
-        SystemZISD::ROTATE_MASK, DL, VecVT,
-        DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
-        DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
-    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-  }
-  return SDValue();
-}
-
  // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
  // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
  // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
@@ -4561,55 +4576,14 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
  
  SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                                   SelectionDAG &DAG) const {
-  const SystemZInstrInfo *TII =
-    static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
    auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
    SDLoc DL(Op);
    EVT VT = Op.getValueType();
  
    if (BVN->isConstant()) {
-    // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
-    // preferred way of creating all-zero and all-one vectors so give it
-    // priority over other methods below.
-    uint64_t Mask;
-    if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
-        ISD::isBuildVectorAllOnes(Op.getNode()) ||
-        (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask)))
+    if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
        return Op;
  
-    // Try using some form of replication.
-    APInt SplatBits, SplatUndef;
-    unsigned SplatBitSize;
-    bool HasAnyUndefs;
-    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
-                             8, true) &&
-        SplatBitSize <= 64) {
-      // First try assuming that any undefined bits above the highest set bit
-      // and below the lowest set bit are 1s.  This increases the likelihood of
-      // being able to use a sign-extended element value in VECTOR REPLICATE
-      // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
-      uint64_t SplatBitsZ = SplatBits.getZExtValue();
-      uint64_t SplatUndefZ = SplatUndef.getZExtValue();
-      uint64_t Lower = (SplatUndefZ
-                        & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
-      uint64_t Upper = (SplatUndefZ
-                        & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
-      uint64_t Value = SplatBitsZ | Upper | Lower;
-      SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
-                                           SplatBitSize);
-      if (Op.getNode())
-        return Op;
-
-      // Now try assuming that any undefined bits between the first and
-      // last defined set bits are set.  This increases the chances of
-      // using a non-wraparound mask.
-      uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
-      Value = SplatBitsZ | Middle;
-      Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
-      if (Op.getNode())
-        return Op;
-    }
-
      // Fall back to loading it from memory.
      return SDValue();
    }
@@ -5055,6 +5029,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
      OPCODE(TBEGIN);
      OPCODE(TBEGIN_NOFLOAT);
      OPCODE(TEND);
+    OPCODE(BYTE_MASK);
      OPCODE(ROTATE_MASK);
      OPCODE(REPLICATE);
      OPCODE(JOIN_DWORDS);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h

index 8efe220..b8dc4ab 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -162,6 +162,10 @@ enum NodeType : unsigned {
    // Transaction end.  Just the chain operand.  Returns CC value and chain.
    TEND,
  
+  // Create a vector constant by filling byte N of the result with bit
+  // 15-N of the single operand.
+  BYTE_MASK,
+
    // Create a vector constant by replicating an element-sized RISBG-style mask.
    // The first operand specifies the starting set bit and the second operand
    // specifies the ending set bit.  Both operands count from the MSB of the
@@ -513,9 +517,6 @@ public:
      return true;
    }
  
-  static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask);
-  static bool analyzeFPImm(const APFloat &Imm, unsigned BitWidth,
-                 unsigned &Start, unsigned &End, const SystemZInstrInfo *TII);
  private:
    const SystemZSubtarget &Subtarget;
  
@@ -643,6 +644,24 @@ private:
  
    const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
  };
+
+struct SystemZVectorConstantInfo {
+private:
+  APInt IntBits;             // The 128 bits as an integer.
+  APInt SplatBits;           // Smallest splat value.
+  APInt SplatUndef;          // Bits correspoding to undef operands of the BVN.
+  unsigned SplatBitSize = 0;
+  bool isFP128 = false;
+
+public:
+  unsigned Opcode = 0;
+  SmallVector<unsigned, 2> OpVals;
+  MVT VecVT;
+  SystemZVectorConstantInfo(APFloat FPImm);
+  SystemZVectorConstantInfo(BuildVectorSDNode *BVN);
+  bool isVectorConstantLegal(const SystemZSubtarget &Subtarget);
+};
+
  } // end namespace llvm
  
  #endif
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td

index dd2a0d5..82cca0b 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {
      // Generate byte mask.
      def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
      def VONE  : InherentVRIa<"vone", 0xE744, 0xffff>;
-    def VGBM  : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>;
+    def VGBM  : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
  
      // Generate mask.
      def VGM  : BinaryVRIbGeneric<"vgm", 0xE746>;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td

index 9914db8..876a809 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -286,6 +286,7 @@ def z_vector_insert     : SDNode<"ISD::INSERT_VECTOR_ELT",
                                   SDT_ZInsertVectorElt>;
  def z_vector_extract    : SDNode<"ISD::EXTRACT_VECTOR_ELT",
                                   SDT_ZExtractVectorElt>;
+def z_byte_mask         : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
  def z_rotate_mask       : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
  def z_replicate         : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
  def z_join_dwords       : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
diff --git a/llvm/test/CodeGen/SystemZ/fp-const-11.ll b/llvm/test/CodeGen/SystemZ/fp-const-11.ll

index 8523f27..ff8289d 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/fp-const-11.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-const-11.ll
@@ -38,3 +38,33 @@ define void @f3(fp128 *%x) {
    store fp128 0xL00000000000000003fff000002000000, fp128 *%x
    ret void
  }
+
+; Test that VGBM works.
+define void @f4(fp128 *%x) {
+; CHECK-LABEL: f4:
+; CHECK:      vgbm %v0, 21845
+; CHECK-NEXT: vst %v0, 0(%r2)
+; CHECK-NEXT: br %r14
+  store fp128 0xL00ff00ff00ff00ff00ff00ff00ff00ff, fp128 *%x
+  ret void
+}
+
+; Test that VREPI works.
+define void @f5(fp128 *%x) {
+; CHECK-LABEL: f5:
+; CHECK:      vrepib  %v0, -8
+; CHECK-NEXT: vst %v0, 0(%r2)
+; CHECK-NEXT: br %r14
+  store fp128 0xLf8f8f8f8f8f8f8f8f8f8f8f8f8f8f8f8, fp128 *%x
+  ret void
+}
+
+; Test that VGM works.
+define void @f6(fp128 *%x) {
+; CHECK-LABEL: f6:
+; CHECK:      vgmg %v0, 12, 31
+; CHECK-NEXT: vst %v0, 0(%r2)
+; CHECK-NEXT: br %r14
+  store fp128 0xL000fffff00000000000fffff00000000, fp128 *%x
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-const-05.ll b/llvm/test/CodeGen/SystemZ/vec-const-05.ll

index 719280e..55f3cdd 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/vec-const-05.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-const-05.ll
@@ -1,28 +1,63 @@
-; Test vector byte masks, v4f32 version. Only all-zero vectors are handled.
+; Test vector byte masks, v4f32 version.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test an all-zeros vector.
-define <4 x float> @f0() {
-; CHECK-LABEL: f0:
+define <4 x float> @f1() {
+; CHECK-LABEL: f1:
  ; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
    ret <4 x float> zeroinitializer
  }
  
-; Test that undefs are treated as zero.
-define <4 x float> @f1() {
-; CHECK-LABEL: f1:
-; CHECK: vgbm %v24, 0
+; Test an all-ones vector.
+define <4 x float> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
  ; CHECK: br %r14
-  ret <4 x float> <float zeroinitializer, float undef,
-                   float zeroinitializer, float undef>
+  ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000,
+                   float 0xffffffffe0000000, float 0xffffffffe0000000>
+}
+
+; Test a mixed vector (mask 0xc731).
+define <4 x float> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 50993
+; CHECK: br %r14
+  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000,
+                   float 0x379fffe000000000, float 0x371fe00000000000>
+}
+
+; Test that undefs are treated as zero (mask 0xc031).
+define <4 x float> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 49201
+; CHECK: br %r14
+  ret <4 x float> <float 0xffffe00000000000, float undef,
+                   float 0x379fffe000000000, float 0x371fe00000000000>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <4 x float> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000,
+                   float 0x379fffe000000000, float 0x371fe00000000000>
  }
  
  ; Test an all-zeros v2f32 that gets promoted to v4f32.
-define <2 x float> @f2() {
-; CHECK-LABEL: f2:
+define <2 x float> @f6() {
+; CHECK-LABEL: f6:
  ; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
    ret <2 x float> zeroinitializer
  }
+
+; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700).
+define <2 x float> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgbm %v24, 50944
+; CHECK: br %r14
+  ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000>
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-const-06.ll b/llvm/test/CodeGen/SystemZ/vec-const-06.ll

index 6144e0f..be53a05 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/vec-const-06.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-const-06.ll
@@ -1,19 +1,43 @@
-; Test vector byte masks, v2f64 version. Only all-zero vectors are handled.
+; Test vector byte masks, v2f64 version.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test an all-zeros vector.
-define <2 x double> @f0() {
-; CHECK-LABEL: f0:
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
  ; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
    ret <2 x double> zeroinitializer
  }
  
-; Test that undefs are treated as zero.
-define <2 x double> @f1() {
-; CHECK-LABEL: f1:
-; CHECK: vgbm %v24, 0
+; Test an all-ones vector.
+define <2 x double> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
+; CHECK: br %r14
+  ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
+}
+
+; Test a mixed vector (mask 0x8c76).
+define <2 x double> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 35958
+; CHECK: br %r14
+  ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00>
+}
+
+; Test that undefs are treated as zero (mask 0x8c00).
+define <2 x double> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 35840
+; CHECK: br %r14
+  ret <2 x double> <double 0xff000000ffff0000, double undef>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <2 x double> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
  ; CHECK: br %r14
-  ret <2 x double> <double zeroinitializer, double undef>
+  ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00>
  }
diff --git a/llvm/test/CodeGen/SystemZ/vec-const-19.ll b/llvm/test/CodeGen/SystemZ/vec-const-19.ll

new file mode 100644 (file)

index 0000000..e48bfe9
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-const-19.ll
@@ -0,0 +1,18 @@
+; Test that a scalar FP constant can be reused from a vector splat constant
+; of the same value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+define void @fun()  {
+; CHECK-LABEL: fun:
+; CHECK: vgmg %v0, 2, 10
+; CHECK-NOT: vgmg %v0, 2, 10
+
+  %tmp = fadd <2 x double> zeroinitializer, <double 1.000000e+00, double 1.000000e+00>
+  %tmp1 = fmul <2 x double> %tmp, <double 5.000000e-01, double 5.000000e-01>
+  store <2 x double> %tmp1, <2 x double>* undef
+  %tmp2 = load double, double* undef
+  %tmp3 = fmul double %tmp2, 5.000000e-01
+  store double %tmp3, double* undef
+  ret void
+}
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Tue, 26 Feb 2019 16:47:59 +0000 (16:47 +0000)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Tue, 26 Feb 2019 16:47:59 +0000 (16:47 +0000)
llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZISelLowering.h		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZInstrVector.td		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZOperators.td		patch \| blob \| history
llvm/test/CodeGen/SystemZ/fp-const-11.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/vec-const-05.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/vec-const-06.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/vec-const-19.ll	[new file with mode: 0644]	patch \| blob