[X86][AVX512CDI] Move v2i64/v4i64 and v4i32/v8i32 VPLZCNT lowering to tablegen

author Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 5 May 2017 12:20:34 +0000 (12:20 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 5 May 2017 12:20:34 +0000 (12:20 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 5 May 2017 12:20:34 +0000 (12:20 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 5 May 2017 12:20:34 +0000 (12:20 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index fb9c177..5dd2e52 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1352,8 +1352,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
      setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
  
      if (Subtarget.hasCDI()) {
+      // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
        setOperationAction(ISD::CTLZ,             MVT::v8i64,  Legal);
        setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v4i64,  Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v8i32,  Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v2i64,  Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v4i32,  Legal);
  
        setOperationAction(ISD::CTLZ,             MVT::v8i16,  Custom);
        setOperationAction(ISD::CTLZ,             MVT::v16i8,  Custom);
@@ -1362,23 +1367,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
  
        setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i64,  Custom);
        setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v16i32, Custom);
-
-      if (Subtarget.hasVLX()) {
-        setOperationAction(ISD::CTLZ,             MVT::v4i64, Legal);
-        setOperationAction(ISD::CTLZ,             MVT::v8i32, Legal);
-        setOperationAction(ISD::CTLZ,             MVT::v2i64, Legal);
-        setOperationAction(ISD::CTLZ,             MVT::v4i32, Legal);
-      } else {
-        setOperationAction(ISD::CTLZ,             MVT::v4i64, Custom);
-        setOperationAction(ISD::CTLZ,             MVT::v8i32, Custom);
-        setOperationAction(ISD::CTLZ,             MVT::v2i64, Custom);
-        setOperationAction(ISD::CTLZ,             MVT::v4i32, Custom);
-      }
-
-      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i64, Custom);
-      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i32, Custom);
-      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v2i64, Custom);
-      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i32, Custom);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i64,  Custom);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i32,  Custom);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v2i64,  Custom);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i32,  Custom);
      } // Subtarget.hasCDI()
  
      if (Subtarget.hasDQI()) {
@@ -20981,12 +20973,10 @@ static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) {
  
  /// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
  //
-// 1. i32/i64 128/256-bit vector (native support require VLX) are expended
-//    to 512-bit vector.
-// 2. i8/i16 vector implemented using dword LZCNT vector instruction
-//    ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
-//    split the vector, perform operation on it's Lo a Hi part and
-//    concatenate the results.
+// i8/i16 vector implemented using dword LZCNT vector instruction
+// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
+// split the vector, perform operation on it's Lo a Hi part and
+// concatenate the results.
  static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) {
    assert(Op.getOpcode() == ISD::CTLZ);
    SDLoc dl(Op);
@@ -20994,22 +20984,6 @@ static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) {
    MVT EltVT = VT.getVectorElementType();
    unsigned NumElems = VT.getVectorNumElements();
  
-  if (EltVT == MVT::i64 || EltVT == MVT::i32) {
-    // Extend to 512 bit vector.
-    assert((VT.is256BitVector() || VT.is128BitVector()) &&
-              "Unsupported value type for operation");
-
-    MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits());
-    SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
-                                 DAG.getUNDEF(NewVT),
-                                 Op.getOperand(0),
-                                 DAG.getIntPtrConstant(0, dl));
-    SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512);
-
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode,
-                       DAG.getIntPtrConstant(0, dl));
-  }
-
    assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
            "Unsupported element type");
  
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td

index c38c13b..91eff70 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8639,6 +8639,31 @@ multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
  defm VPLZCNT    : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
  defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
  
+// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
+let Predicates = [HasCDI, NoVLX] in {
+  def : Pat<(v4i64 (ctlz VR256X:$src)),
+            (EXTRACT_SUBREG
+                (VPLZCNTQZrr
+                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
+             sub_ymm)>;
+  def : Pat<(v2i64 (ctlz VR128X:$src)),
+            (EXTRACT_SUBREG
+                (VPLZCNTQZrr
+                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
+             sub_xmm)>;
+
+  def : Pat<(v8i32 (ctlz VR256X:$src)),
+            (EXTRACT_SUBREG
+                (VPLZCNTDZrr
+                    (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
+             sub_ymm)>;
+  def : Pat<(v4i32 (ctlz VR128X:$src)),
+            (EXTRACT_SUBREG
+                (VPLZCNTDZrr
+                    (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
+             sub_xmm)>;
+}
+
  //===---------------------------------------------------------------------===//
  // Replicate Single FP - MOVSHDUP and MOVSLDUP
  //===---------------------------------------------------------------------===//
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 5 May 2017 12:20:34 +0000 (12:20 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 5 May 2017 12:20:34 +0000 (12:20 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history