[DAG] Legalize umin(x,y) -> sub(x,usubsat(x,y)) and umax(x,y) -> add(x,usubsat(y...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 27 Nov 2020 11:00:34 +0000 (11:00 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 27 Nov 2020 11:18:58 +0000 (11:18 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 27 Nov 2020 11:00:34 +0000 (11:00 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 27 Nov 2020 11:18:58 +0000 (11:18 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 3464fe87d99fc8896652c0d5071add82fe0016da..e45a311f84a480517eba432db803a6bb373cb107 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7478,10 +7478,26 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
    SDValue Op0 = Node->getOperand(0);
    SDValue Op1 = Node->getOperand(1);
    EVT VT = Op0.getValueType();
+  unsigned Opcode = Node->getOpcode();
+  SDLoc DL(Node);
+
+  // umin(x,y) -> sub(x,usubsat(x,y))
+  if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
+      isOperationLegal(ISD::USUBSAT, VT)) {
+    return DAG.getNode(ISD::SUB, DL, VT, Op0,
+                       DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
+  }
+
+  // umax(x,y) -> add(x,usubsat(y,x))
+  if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
+      isOperationLegal(ISD::USUBSAT, VT)) {
+    return DAG.getNode(ISD::ADD, DL, VT, Op0,
+                       DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
+  }
  
    // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
    ISD::CondCode CC;
-  switch (Node->getOpcode()) {
+  switch (Opcode) {
    default: llvm_unreachable("How did we get here?");
    case ISD::SMAX: CC = ISD::SETGT; break;
    case ISD::SMIN: CC = ISD::SETLT; break;
@@ -7494,7 +7510,6 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
    if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
      return DAG.UnrollVectorOp(Node);
  
-  SDLoc DL(Node);
    SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
    return DAG.getSelect(DL, VT, Cond, Op0, Op1);
  }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

index 54bb3d0c7781bbab6a7f513f1343e0f4bc7455e6..37a34023b8d01fa91a240ced90182789d02d2040 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -217,10 +217,17 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                        TTI::TargetCostKind CostKind) {
    auto *RetTy = ICA.getReturnType();
    switch (ICA.getID()) {
-  case Intrinsic::smin:
    case Intrinsic::umin:
-  case Intrinsic::smax:
    case Intrinsic::umax: {
+    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    // umin(x,y) -> sub(x,usubsat(x,y))
+    // umax(x,y) -> add(x,usubsat(y,x))
+    if (LT.second == MVT::v2i64)
+      return LT.first * 2;
+    LLVM_FALLTHROUGH;
+  }
+  case Intrinsic::smin:
+  case Intrinsic::smax: {
      static const auto ValidMinMaxTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
                                          MVT::v8i16, MVT::v2i32, MVT::v4i32};
      auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index fcbe1330b54609b7d37fadaf9dc0d469220809fa..5cbca95f45f52a448ff090fdb429dbaa8585371b 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26959,22 +26959,6 @@ static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
    if (VT == MVT::v32i16 || VT == MVT::v64i8)
      return splitVectorIntBinary(Op, DAG);
  
-  SDLoc DL(Op);
-  unsigned Opcode = Op.getOpcode();
-  SDValue N0 = Op.getOperand(0);
-  SDValue N1 = Op.getOperand(1);
-
-  // For pre-SSE41, we can perform UMIN/UMAX v8i16 by using psubusw.
-  if (VT == MVT::v8i16) {
-    assert((Opcode == ISD::UMIN || Opcode == ISD::UMAX) &&
-           "Unexpected MIN/MAX opcode");
-    if (Opcode == ISD::UMIN)
-      return DAG.getNode(ISD::SUB, DL, VT, N0,
-                         DAG.getNode(ISD::USUBSAT, DL, VT, N0, N1));
-    return DAG.getNode(ISD::ADD, DL, VT,
-                       DAG.getNode(ISD::USUBSAT, DL, VT, N1, N0), N0);
-  }
-
    // Default to expand.
    return SDValue();
  }
diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll

index f47fc1c49ef5553945d3986f73b38328f6bb9a89..18bac082a66ac1e7e6aaec637b410f3e66bffe7b 100644 (file)
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -115,12 +115,13 @@ define <8 x i32> @umin.v8i32(<8 x i32> %v0, <8 x i32> %v1) {
  }
  
  ; COST-LABEL: umin.v2i64
-; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %v0, <2 x i64> %v1)
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %v0, <2 x i64> %v1)
  
  ; CODE-LABEL: umin.v2i64
  ; CODE:       bb.0
-; CODE:        csel
-; CODE:        csel
+; CODE-NEXT:   uqsub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT:   sub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT:   ret
  
  declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
  define <2 x i64> @umin.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
@@ -369,12 +370,13 @@ define <8 x i32> @umax.v8i32(<8 x i32> %v0, <8 x i32> %v1) {
  }
  
  ; COST-LABEL: umax.v2i64
-; COST-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %v0, <2 x i64> %v1)
+; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %v0, <2 x i64> %v1)
  
  ; CODE-LABEL: umax.v2i64
  ; CODE:       bb.0
-; CODE:        csel
-; CODE:        csel
+; CODE-NEXT:   uqsub v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT:   add v{{.*}}.2d, v{{.*}}.2d, v{{.*}}.2d
+; CODE-NEXT:   ret
  
  declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
  define <2 x i64> @umax.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll

index c12d90fee09e9fac60dd0aa90fb5eaab6dfbd4d7..8ef3f307d0b58a907416e0b437e9e50a17f72c4a 100644 (file)
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
@@ -2179,8 +2179,8 @@ define <8 x i16> @vec128_i16_unsigned_reg_reg(<8 x i16> %a1, <8 x i16> %a2) noun
  ; SSE2-NEXT:    psubusw %xmm1, %xmm2
  ; SSE2-NEXT:    psubusw %xmm0, %xmm1
  ; SSE2-NEXT:    psubw %xmm0, %xmm2
-; SSE2-NEXT:    paddw %xmm0, %xmm2
  ; SSE2-NEXT:    paddw %xmm1, %xmm2
+; SSE2-NEXT:    paddw %xmm0, %xmm2
  ; SSE2-NEXT:    psrlw $1, %xmm2
  ; SSE2-NEXT:    pmullw %xmm3, %xmm2
  ; SSE2-NEXT:    paddw %xmm0, %xmm2
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 27 Nov 2020 11:00:34 +0000 (11:00 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 27 Nov 2020 11:18:58 +0000 (11:18 +0000)
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/Analysis/CostModel/AArch64/min-max.ll		patch \| blob \| history
llvm/test/CodeGen/X86/midpoint-int-vec-128.ll		patch \| blob \| history