[DAGCombiner] use UADDO to optimize saturated unsigned add

author Sanjay Patel <spatel@rotateright.com>

Mon, 24 Sep 2018 14:47:15 +0000 (14:47 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 24 Sep 2018 14:47:15 +0000 (14:47 +0000)
author Sanjay Patel <spatel@rotateright.com>
Mon, 24 Sep 2018 14:47:15 +0000 (14:47 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 24 Sep 2018 14:47:15 +0000 (14:47 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index fd7d591..5c7c029 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7346,6 +7346,35 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
                                                  CC, TLI, DAG))
          return FMinMax;
  
+    // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
+    // This is conservatively limited to pre-legal-operations to give targets
+    // a chance to reverse the transform if they want to do that. Also, it is
+    // unlikely that the pattern would be formed late, so it's probably not
+    // worth going through the other checks.
+    if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
+        CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
+        N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
+      auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
+      auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
+      if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
+        // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
+        // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
+        //
+        // The IR equivalent of this transform would have this form:
+        //   %a = add %x, C
+        //   %c = icmp ugt %x, ~C
+        //   %r = select %c, -1, %a
+        //   =>
+        //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
+        //   %u0 = extractvalue %u, 0
+        //   %u1 = extractvalue %u, 1
+        //   %r = select %u1, -1, %u0
+        SDVTList VTs = DAG.getVTList(VT, VT0);
+        SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
+        return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
+      }
+    }
+
      if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
          (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
        return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll

index ad13274..d908285 100644 (file)
--- a/llvm/test/CodeGen/AArch64/sat-add.ll
+++ b/llvm/test/CodeGen/AArch64/sat-add.ll
@@ -123,9 +123,8 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
  define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
  ; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #42 // =42
-; CHECK-NEXT:    cmn w0, #43 // =43
-; CHECK-NEXT:    csinv w0, w8, wzr, ls
+; CHECK-NEXT:    adds w8, w0, #42 // =42
+; CHECK-NEXT:    csinv w0, w8, wzr, lo
  ; CHECK-NEXT:    ret
    %a = add i32 %x, 42
    %c = icmp ugt i32 %x, -43
@@ -162,9 +161,8 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
  define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
  ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #42 // =42
-; CHECK-NEXT:    cmn x0, #43 // =43
-; CHECK-NEXT:    csinv x0, x8, xzr, ls
+; CHECK-NEXT:    adds x8, x0, #42 // =42
+; CHECK-NEXT:    csinv x0, x8, xzr, lo
  ; CHECK-NEXT:    ret
    %a = add i64 %x, 42
    %c = icmp ugt i64 %x, -43
diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll

index 5b667d8..1f2b69f 100644 (file)
--- a/llvm/test/CodeGen/X86/sat-add.ll
+++ b/llvm/test/CodeGen/X86/sat-add.ll
@@ -44,11 +44,10 @@ define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
  define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
  ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
  ; ANY:       # %bb.0:
-; ANY-NEXT:    cmpb $-43, %dil
+; ANY-NEXT:    addb $42, %dil
  ; ANY-NEXT:    movb $-1, %al
-; ANY-NEXT:    ja .LBB2_2
+; ANY-NEXT:    jb .LBB2_2
  ; ANY-NEXT:  # %bb.1:
-; ANY-NEXT:    addb $42, %dil
  ; ANY-NEXT:    movl %edi, %eax
  ; ANY-NEXT:  .LBB2_2:
  ; ANY-NEXT:    retq
@@ -91,12 +90,9 @@ define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
  define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
  ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
  ; ANY:       # %bb.0:
-; ANY-NEXT:    # kill: def $edi killed $edi def $rdi
-; ANY-NEXT:    leal 42(%rdi), %ecx
-; ANY-NEXT:    movzwl %di, %eax
-; ANY-NEXT:    cmpl $65493, %eax # imm = 0xFFD5
+; ANY-NEXT:    addw $42, %di
  ; ANY-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; ANY-NEXT:    cmovbel %ecx, %eax
+; ANY-NEXT:    cmovael %edi, %eax
  ; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
  ; ANY-NEXT:    retq
    %a = add i16 %x, 42
@@ -135,11 +131,9 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
  define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
  ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval:
  ; ANY:       # %bb.0:
-; ANY-NEXT:    # kill: def $edi killed $edi def $rdi
-; ANY-NEXT:    leal 42(%rdi), %ecx
-; ANY-NEXT:    cmpl $-43, %edi
+; ANY-NEXT:    addl $42, %edi
  ; ANY-NEXT:    movl $-1, %eax
-; ANY-NEXT:    cmovbel %ecx, %eax
+; ANY-NEXT:    cmovael %edi, %eax
  ; ANY-NEXT:    retq
    %a = add i32 %x, 42
    %c = icmp ugt i32 %x, -43
@@ -177,10 +171,9 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
  define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
  ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
  ; ANY:       # %bb.0:
-; ANY-NEXT:    cmpq $-43, %rdi
-; ANY-NEXT:    leaq 42(%rdi), %rax
-; ANY-NEXT:    movq $-1, %rcx
-; ANY-NEXT:    cmovaq %rcx, %rax
+; ANY-NEXT:    addq $42, %rdi
+; ANY-NEXT:    movq $-1, %rax
+; ANY-NEXT:    cmovaeq %rdi, %rax
  ; ANY-NEXT:    retq
    %a = add i64 %x, 42
    %c = icmp ugt i64 %x, -43
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 24 Sep 2018 14:47:15 +0000 (14:47 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 24 Sep 2018 14:47:15 +0000 (14:47 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/sat-add.ll		patch \| blob \| history
llvm/test/CodeGen/X86/sat-add.ll		patch \| blob \| history