SelectionDAG: Teach the legalizer to split SETCC if VSELECT needs splitting too.

author Juergen Ributzka <juergen@apple.com>

Wed, 30 Oct 2013 05:48:18 +0000 (05:48 +0000)

committer Juergen Ributzka <juergen@apple.com>

Wed, 30 Oct 2013 05:48:18 +0000 (05:48 +0000)
author Juergen Ributzka <juergen@apple.com>
Wed, 30 Oct 2013 05:48:18 +0000 (05:48 +0000)
committer Juergen Ributzka <juergen@apple.com>
Wed, 30 Oct 2013 05:48:18 +0000 (05:48 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index de0f6ce..8237ef3 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4346,6 +4346,28 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
      }
    }
  
+  // Treat SETCC as a vector mask and promote the result type based on the
+  // targets expected SETCC result type. This will ensure that SETCC and VSELECT
+  // are both split by the type legalizer. This is done to prevent the type
+  // legalizer from unrolling SETCC into scalar comparions.
+  EVT SelectVT = N->getValueType(0);
+  EVT MaskVT = getSetCCResultType(SelectVT);
+  if (N0.getOpcode() == ISD::SETCC && N0.getValueType() != MaskVT) {
+    SDLoc MaskDL(N0);
+
+    // Extend the mask to the desired value type.
+    ISD::NodeType ExtendCode =
+      TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+    SDValue Mask = DAG.getNode(ExtendCode, MaskDL, MaskVT, N0);
+
+    AddToWorkList(Mask.getNode());
+
+    SDValue LHS = N->getOperand(1);
+    SDValue RHS = N->getOperand(2);
+
+    return DAG.getNode(ISD::VSELECT, DL, SelectVT, Mask, LHS, RHS);
+  }
+
    return SDValue();
  }
  
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

index 7b1d14d..f1b06fc 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -492,14 +492,19 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
    SDValue Cond = N->getOperand(0);
    CL = CH = Cond;
    if (Cond.getValueType().isVector()) {
-    assert(Cond.getValueType().getVectorElementType() == MVT::i1 &&
-           "Condition legalized before result?");
-    unsigned NumElements = Cond.getValueType().getVectorNumElements();
-    EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2);
-    CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
-                     DAG.getConstant(0, TLI.getVectorIdxTy()));
-    CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
-                     DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
+    if (Cond.getOpcode() == ISD::SETCC) {
+      assert(Cond.getValueType() == getSetCCResultType(N->getValueType(0)) &&
+             "Condition has not been prepared for split!");
+      GetSplitVector(Cond, CL, CH);
+    } else {
+      EVT ETy = Cond.getValueType().getVectorElementType();
+      unsigned NumElements = Cond.getValueType().getVectorNumElements();
+      EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), ETy, NumElements / 2);
+      CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+                       DAG.getConstant(0, TLI.getVectorIdxTy()));
+      CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond,
+                       DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy()));
+    }
    }
  
    Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 5dbef0f..18064fc 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1546,7 +1546,16 @@ void X86TargetLowering::resetOperationActions() {
  }
  
  EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
-  if (!VT.isVector()) return MVT::i8;
+  if (!VT.isVector())
+    return MVT::i8;
+
+  const TargetMachine &TM = getTargetMachine();
+  if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
+    switch(VT.getVectorNumElements()) {
+    case  8: return MVT::v8i1;
+    case 16: return MVT::v16i1;
+    }
+
    return VT.changeVectorElementTypeToInteger();
  }
  
diff --git a/llvm/test/CodeGen/X86/vec_split.ll b/llvm/test/CodeGen/X86/vec_split.ll

new file mode 100644 (file)

index 0000000..f9e7c20
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vec_split.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
+; SSE4-LABEL: split16:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split16:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split16:
+; AVX2: vpminuw
+; AVX2: ret
+  %1 = icmp ult <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @split32(<32 x i16> %a, <32 x i16> %b, <32 x i8> %__mask) {
+; SSE4-LABEL: split32:
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: pminuw
+; SSE4: ret
+; AVX1-LABEL: split32:
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: vpminuw
+; AVX1: ret
+; AVX2-LABEL: split32:
+; AVX2: vpminuw
+; AVX2: vpminuw
+; AVX2: ret
+  %1 = icmp ult <32 x i16> %a, %b
+  %2 = select <32 x i1> %1, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %2
+}
author	Juergen Ributzka <juergen@apple.com>
	Wed, 30 Oct 2013 05:48:18 +0000 (05:48 +0000)
committer	Juergen Ributzka <juergen@apple.com>
	Wed, 30 Oct 2013 05:48:18 +0000 (05:48 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vec_split.ll	[new file with mode: 0644]	patch \| blob