[SystemZ] Improve handling of ZERO_EXTEND_VECTOR_INREG.

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Thu, 26 Mar 2020 11:22:14 +0000 (12:22 +0100)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Tue, 30 Jun 2020 07:08:10 +0000 (09:08 +0200)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Thu, 26 Mar 2020 11:22:14 +0000 (12:22 +0100)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Tue, 30 Jun 2020 07:08:10 +0000 (09:08 +0200)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

index a753f2a14a35a9a50083ef7ea5e5e47109fac02f..0d71c7fed5d43a511e9d301dec1ea78fa7322c32 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4467,12 +4467,22 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
  }
  
  static bool isZeroVector(SDValue N) {
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0);
    if (N->getOpcode() == ISD::SPLAT_VECTOR)
      if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
        return Op->getZExtValue() == 0;
    return ISD::isBuildVectorAllZeros(N.getNode());
  }
  
+// Return the index of the zero/undef vector, or UINT32_MAX if not found.
+static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
+  for (unsigned I = 0; I < Num ; I++)
+    if (isZeroVector(Ops[I]))
+      return I;
+  return UINT32_MAX;
+}
+
  // Bytes is a VPERM-like permute vector, except that -1 is used for
  // undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
  // VSLDB or VPERM.
@@ -4491,9 +4501,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
  
    // Fall back on VPERM.  Construct an SDNode for the permute vector.  Try to
    // eliminate a zero vector by reusing any zero index in the permute vector.
-  unsigned ZeroVecIdx =
-    isZeroVector(Ops[0]) ? 0 : (isZeroVector(Ops[1]) ? 1 : UINT_MAX);
-  if (ZeroVecIdx != UINT_MAX) {
+  unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
+  if (ZeroVecIdx != UINT32_MAX) {
      bool MaskFirst = true;
      int ZeroIdx = -1;
      for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
@@ -4551,10 +4560,13 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
  namespace {
  // Describes a general N-operand vector shuffle.
  struct GeneralShuffle {
-  GeneralShuffle(EVT vt) : VT(vt) {}
+  GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
    void addUndef();
    bool add(SDValue, unsigned);
    SDValue getNode(SelectionDAG &, const SDLoc &);
+  void tryPrepareForUnpack();
+  bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
+  SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
  
    // The operands of the shuffle.
    SmallVector<SDValue, SystemZ::VectorBytes> Ops;
@@ -4566,6 +4578,9 @@ struct GeneralShuffle {
  
    // The type of the shuffle result.
    EVT VT;
+
+  // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
+  unsigned UnpackFromEltSize;
  };
  }
  
@@ -4648,6 +4663,9 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
    if (Ops.size() == 0)
      return DAG.getUNDEF(VT);
  
+  // Use a single unpack if possible as the last operation.
+  tryPrepareForUnpack();
+
    // Make sure that there are at least two shuffle operands.
    if (Ops.size() == 1)
      Ops.push_back(DAG.getUNDEF(MVT::v16i8));
@@ -4713,13 +4731,117 @@ SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
    // to VPERM.
    unsigned OpNo0, OpNo1;
    SDValue Op;
-  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
+  if (unpackWasPrepared() && Ops[1].isUndef())
+    Op = Ops[0];
+  else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
      Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
    else
      Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
+
+  Op = insertUnpackIfPrepared(DAG, DL, Op);
+
    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
  }
  
+#ifndef NDEBUG
+static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
+  dbgs() << Msg.c_str() << " { ";
+  for (unsigned i = 0; i < Bytes.size(); i++)
+    dbgs() << Bytes[i] << " ";
+  dbgs() << "}\n";
+}
+#endif
+
+// If the Bytes vector matches an unpack operation, prepare to do the unpack
+// after all else by removing the zero vector and the effect of the unpack on
+// Bytes.
+void GeneralShuffle::tryPrepareForUnpack() {
+  uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
+  if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
+    return;
+
+  // Only do this if removing the zero vector reduces the depth, otherwise
+  // the critical path will increase with the final unpack.
+  if (Ops.size() > 2 &&
+      Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
+    return;
+
+  // Find an unpack that would allow removing the zero vector from Ops.
+  UnpackFromEltSize = 1;
+  for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
+    bool MatchUnpack = true;
+    SmallVector<int, SystemZ::VectorBytes> SrcBytes;
+    for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
+      unsigned ToEltSize = UnpackFromEltSize * 2;
+      bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
+      if (!IsZextByte)
+        SrcBytes.push_back(Bytes[Elt]);
+      if (Bytes[Elt] != -1) {
+        unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
+        if (IsZextByte != (OpNo == ZeroVecOpNo)) {
+          MatchUnpack = false;
+          break;
+        }
+      }
+    }
+    if (MatchUnpack) {
+      if (Ops.size() == 2) {
+        // Don't use unpack if a single source operand needs rearrangement.
+        for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
+          if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
+            UnpackFromEltSize = UINT_MAX;
+            return;
+          }
+      }
+      break;
+    }
+  }
+  if (UnpackFromEltSize > 4)
+    return;
+
+  LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
+             << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
+             << ".\n";
+             dumpBytes(Bytes, "Original Bytes vector:"););
+
+  // Apply the unpack in reverse to the Bytes array.
+  unsigned B = 0;
+  for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
+    Elt += UnpackFromEltSize;
+    for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
+      Bytes[B] = Bytes[Elt];
+  }
+  while (B < SystemZ::VectorBytes)
+    Bytes[B++] = -1;
+
+  // Remove the zero vector from Ops
+  Ops.erase(&Ops[ZeroVecOpNo]);
+  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
+    if (Bytes[I] >= 0) {
+      unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
+      if (OpNo > ZeroVecOpNo)
+        Bytes[I] -= SystemZ::VectorBytes;
+    }
+
+  LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
+             dbgs() << "\n";);
+}
+
+SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
+                                               const SDLoc &DL,
+                                               SDValue Op) {
+  if (!unpackWasPrepared())
+    return Op;
+  unsigned InBits = UnpackFromEltSize * 8;
+  EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
+                                SystemZ::VectorBits / InBits);
+  SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
+  unsigned OutBits = InBits * 2;
+  EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
+                               SystemZ::VectorBits / OutBits);
+  return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
+}
+
  // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
  static bool isScalarToVector(SDValue Op) {
    for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
@@ -5114,9 +5236,8 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
    return DAG.getNode(ISD::BITCAST, DL, VT, Res);
  }
  
-SDValue
-SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
-                                              unsigned UnpackHigh) const {
+SDValue SystemZTargetLowering::
+lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
    SDValue PackedOp = Op.getOperand(0);
    EVT OutVT = Op.getValueType();
    EVT InVT = PackedOp.getValueType();
@@ -5126,11 +5247,39 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
      FromBits *= 2;
      EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
                                   SystemZ::VectorBits / FromBits);
-    PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
+    PackedOp =
+      DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
    } while (FromBits != ToBits);
    return PackedOp;
  }
  
+// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
+SDValue SystemZTargetLowering::
+lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
+  SDValue PackedOp = Op.getOperand(0);
+  SDLoc DL(Op);
+  EVT OutVT = Op.getValueType();
+  EVT InVT = PackedOp.getValueType();
+  unsigned InNumElts = InVT.getVectorNumElements();
+  unsigned OutNumElts = OutVT.getVectorNumElements();
+  unsigned NumInPerOut = InNumElts / OutNumElts;
+
+  SDValue ZeroVec =
+    DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
+
+  SmallVector<int, 16> Mask(InNumElts);
+  unsigned ZeroVecElt = InNumElts;
+  for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
+    unsigned MaskElt = PackedElt * NumInPerOut;
+    unsigned End = MaskElt + NumInPerOut - 1;
+    for (; MaskElt < End; MaskElt++)
+      Mask[MaskElt] = ZeroVecElt++;
+    Mask[MaskElt] = PackedElt;
+  }
+  SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
+  return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
+}
+
  SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
                                            unsigned ByScalar) const {
    // Look for cases where a vector shift can use the *_BY_SCALAR form.
@@ -5296,9 +5445,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
    case ISD::EXTRACT_VECTOR_ELT:
      return lowerEXTRACT_VECTOR_ELT(Op, DAG);
    case ISD::SIGN_EXTEND_VECTOR_INREG:
-    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
+    return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
    case ISD::ZERO_EXTEND_VECTOR_INREG:
-    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
+    return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
    case ISD::SHL:
      return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
    case ISD::SRL:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h

index e60deaedbdfb6debd3e704974adebb5e234117f0..27637762296a425066034a2c4cc32f520559745b 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -627,8 +627,8 @@ private:
    SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-  SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
-                                 unsigned UnpackHigh) const;
+  SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
  
    bool canTreatAsByteVector(EVT VT) const;
diff --git a/llvm/test/CodeGen/SystemZ/vec-move-16.ll b/llvm/test/CodeGen/SystemZ/vec-move-16.ll

index cd25773968001d076c6fdeaa037a1c3caaf96a6f..cc8237bcfa29867a8b1b0503119823d848cb8c91 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/vec-move-16.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-move-16.ll
@@ -40,9 +40,10 @@ define <4 x i32> @f4(<4 x i1> *%ptr) {
  ; Test a v4i8->v4i32 extension.
  define <4 x i32> @f5(<4 x i8> *%ptr) {
  ; CHECK-LABEL: f5:
+; CHECK: larl  %r1, .LCPI4_0
  ; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
-; CHECK: vuplhh %v24, [[REG2]]
+; CHECK: vl    %v1, 0(%r1), 3
+; CHECK: vperm %v24, %v1, [[REG1]], %v1
  ; CHECK: br %r14
    %val = load <4 x i8>, <4 x i8> *%ptr
    %ret = zext <4 x i8> %val to <4 x i32>
@@ -71,10 +72,10 @@ define <2 x i64> @f7(<2 x i1> *%ptr) {
  ; Test a v2i8->v2i64 extension.
  define <2 x i64> @f8(<2 x i8> *%ptr) {
  ; CHECK-LABEL: f8:
-; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
-; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]]
-; CHECK: vuplhf %v24, [[REG3]]
+; CHECK: larl  %r1, .LCPI7_0
+; CHECK: vlreph        [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vl    %v1, 0(%r1), 3
+; CHECK: vperm %v24, %v1, [[REG1]], %v1
  ; CHECK: br %r14
    %val = load <2 x i8>, <2 x i8> *%ptr
    %ret = zext <2 x i8> %val to <2 x i64>
@@ -84,9 +85,10 @@ define <2 x i64> @f8(<2 x i8> *%ptr) {
  ; Test a v2i16->v2i64 extension.
  define <2 x i64> @f9(<2 x i16> *%ptr) {
  ; CHECK-LABEL: f9:
-; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
-; CHECK: vuplhh [[REG2:%v[0-9]+]], [[REG1]]
-; CHECK: vuplhf %v24, [[REG2]]
+; CHECK: larl  %r1, .LCPI8_0
+; CHECK: vlrepf        [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vl    %v1, 0(%r1), 3
+; CHECK: vperm %v24, %v1, [[REG1]], %v1
  ; CHECK: br %r14
    %val = load <2 x i16>, <2 x i16> *%ptr
    %ret = zext <2 x i16> %val to <2 x i64>
diff --git a/llvm/test/CodeGen/SystemZ/vec-move-23.ll b/llvm/test/CodeGen/SystemZ/vec-move-23.ll

index 430e879bcc06924ab045a101edd9181da3216154..1e31c0b606ee07f92217332cd485d9e5d5d6ba53 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/vec-move-23.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-move-23.ll
@@ -68,9 +68,9 @@ define void @fun3(<4 x i16> %Src, <4 x float>* %Dst) {
  
  define void @fun4(<2 x i8> %Src, <2 x double>* %Dst) {
  ; CHECK-LABEL: fun4:
-; CHECK:      vuplhb   %v0, %v24
-; CHECK-NEXT: vuplhh   %v0, %v0
-; CHECK-NEXT: vuplhf   %v0, %v0
+; CHECK:      larl     %r1, .LCPI4_0
+; CHECK-NEXT: vl       %v0, 0(%r1), 3
+; CHECK-NEXT: vperm    %v0, %v0, %v24, %v0
  ; CHECK-NEXT: vcdlgb   %v0, %v0, 0, 0
  ; CHECK-NEXT: vst      %v0, 0(%r2), 3
  ; CHECK-NEXT: br       %r14
@@ -81,8 +81,9 @@ define void @fun4(<2 x i8> %Src, <2 x double>* %Dst) {
  
  define void @fun5(<2 x i16> %Src, <2 x double>* %Dst) {
  ; CHECK-LABEL: fun5:
-; CHECK:      vuplhh   %v0, %v24
-; CHECK-NEXT: vuplhf   %v0, %v0
+; CHECK:      larl     %r1, .LCPI5_0
+; CHECK-NEXT: vl       %v0, 0(%r1), 3
+; CHECK-NEXT: vperm    %v0, %v0, %v24, %v0
  ; CHECK-NEXT: vcdlgb   %v0, %v0, 0, 0
  ; CHECK-NEXT: vst      %v0, 0(%r2), 3
  ; CHECK-NEXT: br       %r14
diff --git a/llvm/test/CodeGen/SystemZ/vec-move-24.ll b/llvm/test/CodeGen/SystemZ/vec-move-24.ll

new file mode 100644 (file)

index 0000000..4f95bb1
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-move-24.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+;
+; Test that vperm is not used if a single unpack is enough.
+
+define <4 x i32> @fun0(<4 x i32>* %Src) nounwind {
+; CHECK-LABEL: fun0:
+; CHECK-NOT: vperm
+  %tmp = load <4 x i32>, <4 x i32>* %Src
+  %tmp2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+  ret <4 x i32> %tmp2
+}
+
+define  void @fun1(i8 %Src, <32 x i8>* %Dst) nounwind {
+; CHECK-LABEL: fun1:
+; CHECK-NOT: vperm
+  %I0 = insertelement <16 x i8> undef, i8 %Src, i32 0
+  %I1 = insertelement <16 x i8> %I0, i8 %Src, i32 1
+  %I2 = insertelement <16 x i8> %I1, i8 %Src, i32 2
+  %I3 = insertelement <16 x i8> %I2, i8 %Src, i32 3
+  %I4 = insertelement <16 x i8> %I3, i8 %Src, i32 4
+  %I5 = insertelement <16 x i8> %I4, i8 %Src, i32 5
+  %I6 = insertelement <16 x i8> %I5, i8 %Src, i32 6
+  %I7 = insertelement <16 x i8> %I6, i8 %Src, i32 7
+  %I8 = insertelement <16 x i8> %I7, i8 %Src, i32 8
+  %I9 = insertelement <16 x i8> %I8, i8 %Src, i32 9
+  %I10 = insertelement <16 x i8> %I9, i8 %Src, i32 10
+  %I11 = insertelement <16 x i8> %I10, i8 %Src, i32 11
+  %I12 = insertelement <16 x i8> %I11, i8 %Src, i32 12
+  %I13 = insertelement <16 x i8> %I12, i8 %Src, i32 13
+  %I14 = insertelement <16 x i8> %I13, i8 %Src, i32 14
+  %I15 = insertelement <16 x i8> %I14, i8 %Src, i32 15
+
+  %tmp = shufflevector <16 x i8> zeroinitializer,
+                       <16 x i8> %I15,
+                       <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                                   i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+                                   i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+                                   i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %tmp9 = shufflevector <32 x i8> undef,
+                        <32 x i8> %tmp,
+                        <32 x i32> <i32 33, i32 32, i32 48, i32 49, i32 1, i32 17, i32 50, i32 51,
+                                    i32 2, i32 18, i32 52, i32 53, i32 3, i32 19, i32 54, i32 55,
+                                    i32 4, i32 20, i32 56, i32 57, i32 5, i32 21, i32 58, i32 59,
+                                    i32 6, i32 22, i32 60, i32 61, i32 7, i32 62, i32 55, i32 63>
+
+  store <32 x i8> %tmp9, <32 x i8>* %Dst
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-zext.ll b/llvm/test/CodeGen/SystemZ/vec-zext.ll

index 831594d4020c4b8ba0c619c9076af8748d91691b..b4c8f2307b0b7aeeaa1cab0de9113c73b56d2934 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/vec-zext.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-zext.ll
@@ -1,5 +1,5 @@
-; Test that vector zexts are done efficently with unpack instructions also in
-; case of fewer elements than allowed, e.g. <2 x i32>.
+; Test that vector zexts are done efficently also in case of fewer elements
+; than allowed, e.g. <2 x i32>.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
@@ -14,8 +14,9 @@ define <2 x i16> @fun1(<2 x i8> %val1) {
  
  define <2 x i32> @fun2(<2 x i8> %val1) {
  ; CHECK-LABEL: fun2:
-; CHECK:       vuplhb  %v0, %v24
-; CHECK-NEXT:  vuplhh  %v24, %v0
+; CHECK:        larl   %r1, .LCPI1_0
+; CHECK-NEXT:   vl     %v0, 0(%r1), 3
+; CHECK-NEXT:   vperm  %v24, %v0, %v24, %v0
  ; CHECK-NEXT:  br      %r14
    %z = zext <2 x i8> %val1 to <2 x i32>
    ret <2 x i32> %z
@@ -23,9 +24,9 @@ define <2 x i32> @fun2(<2 x i8> %val1) {
  
  define <2 x i64> @fun3(<2 x i8> %val1) {
  ; CHECK-LABEL: fun3:
-; CHECK:       vuplhb  %v0, %v24
-; CHECK-NEXT:  vuplhh  %v0, %v0
-; CHECK-NEXT:  vuplhf  %v24, %v0
+; CHECK:       larl    %r1, .LCPI2_0
+; CHECK-NEXT:  vl      %v0, 0(%r1), 3
+; CHECK-NEXT:  vperm   %v24, %v0, %v24, %v0
  ; CHECK-NEXT:  br      %r14
    %z = zext <2 x i8> %val1 to <2 x i64>
    ret <2 x i64> %z
@@ -41,8 +42,9 @@ define <2 x i32> @fun4(<2 x i16> %val1) {
  
  define <2 x i64> @fun5(<2 x i16> %val1) {
  ; CHECK-LABEL: fun5:
-; CHECK:       vuplhh  %v0, %v24
-; CHECK-NEXT:  vuplhf  %v24, %v0
+; CHECK:       larl    %r1, .LCPI4_0
+; CHECK-NEXT:  vl      %v0, 0(%r1), 3
+; CHECK-NEXT:  vperm   %v24, %v0, %v24, %v0
  ; CHECK-NEXT:  br      %r14
    %z = zext <2 x i16> %val1 to <2 x i64>
    ret <2 x i64> %z
@@ -66,8 +68,9 @@ define <4 x i16> @fun7(<4 x i8> %val1) {
  
  define <4 x i32> @fun8(<4 x i8> %val1) {
  ; CHECK-LABEL: fun8:
-; CHECK:       vuplhb  %v0, %v24
-; CHECK-NEXT:  vuplhh  %v24, %v0
+; CHECK:       larl    %r1, .LCPI7_0
+; CHECK-NEXT:  vl      %v0, 0(%r1), 3
+; CHECK-NEXT:  vperm   %v24, %v0, %v24, %v0
  ; CHECK-NEXT:  br      %r14
    %z = zext <4 x i8> %val1 to <4 x i32>
    ret <4 x i32> %z
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Thu, 26 Mar 2020 11:22:14 +0000 (12:22 +0100)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Tue, 30 Jun 2020 07:08:10 +0000 (09:08 +0200)
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZISelLowering.h		patch \| blob \| history
llvm/test/CodeGen/SystemZ/vec-move-16.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/vec-move-23.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/vec-move-24.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/SystemZ/vec-zext.ll		patch \| blob \| history