[DAGCombiner] transform sub-of-shifted-signbit to add

author Sanjay Patel <spatel@rotateright.com>

Mon, 30 Jul 2018 22:21:37 +0000 (22:21 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 30 Jul 2018 22:21:37 +0000 (22:21 +0000)
author Sanjay Patel <spatel@rotateright.com>
Mon, 30 Jul 2018 22:21:37 +0000 (22:21 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 30 Jul 2018 22:21:37 +0000 (22:21 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 6c6b225..5ce5bab 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2743,6 +2743,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
      }
    }
  
+  // Prefer an add for more folding potential and possibly better codegen:
+  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
+  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
+    SDValue ShAmt = N1.getOperand(1);
+    ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+    if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+      SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
+      return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
+    }
+  }
+
    return SDValue();
  }
  
diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll

index b554ce1..250290a 100644 (file)
--- a/llvm/test/CodeGen/AArch64/signbit-shift.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll
@@ -150,8 +150,8 @@ define i32 @sext_ifneg(i32 %x) {
  define i32 @add_sext_ifneg(i32 %x) {
  ; CHECK-LABEL: add_sext_ifneg:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #42
-; CHECK-NEXT:    sub w0, w8, w0, lsr #31
+; CHECK-NEXT:    asr w8, w0, #31
+; CHECK-NEXT:    add w0, w8, #42 // =42
  ; CHECK-NEXT:    ret
    %c = icmp slt i32 %x, 0
    %e = sext i1 %c to i32
@@ -225,7 +225,7 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
  define i32 @sub_lshr(i32 %x, i32 %y) {
  ; CHECK-LABEL: sub_lshr:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w0, w1, w0, lsr #31
+; CHECK-NEXT:    add w0, w1, w0, asr #31
  ; CHECK-NEXT:    ret
    %sh = lshr i32 %x, 31
    %r = sub i32 %y, %sh
@@ -235,8 +235,8 @@ define i32 @sub_lshr(i32 %x, i32 %y) {
  define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
  ; CHECK-LABEL: sub_lshr_vec:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #31
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ssra v1.4s, v0.4s, #31
+; CHECK-NEXT:    mov v0.16b, v1.16b
  ; CHECK-NEXT:    ret
    %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
    %r = sub <4 x i32> %y, %sh
@@ -246,8 +246,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
  define i32 @sub_const_op_lshr(i32 %x) {
  ; CHECK-LABEL: sub_const_op_lshr:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #43
-; CHECK-NEXT:    sub w0, w8, w0, lsr #31
+; CHECK-NEXT:    asr w8, w0, #31
+; CHECK-NEXT:    add w0, w8, #43 // =43
  ; CHECK-NEXT:    ret
    %sh = lshr i32 %x, 31
    %r = sub i32 43, %sh
@@ -257,9 +257,9 @@ define i32 @sub_const_op_lshr(i32 %x) {
  define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
  ; CHECK-LABEL: sub_const_op_lshr_vec:
  ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #31
  ; CHECK-NEXT:    movi v1.4s, #42
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ssra v1.4s, v0.4s, #31
+; CHECK-NEXT:    mov v0.16b, v1.16b
  ; CHECK-NEXT:    ret
    %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
    %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll

index 7587587..41d250e 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -243,8 +243,8 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
  define i32 @sub_lshr(i32 %x, i32 %y) {
  ; CHECK-LABEL: sub_lshr:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    srwi 3, 3, 31
-; CHECK-NEXT:    subf 3, 3, 4
+; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    add 3, 4, 3
  ; CHECK-NEXT:    blr
    %sh = lshr i32 %x, 31
    %r = sub i32 %y, %sh
@@ -257,8 +257,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
  ; CHECK-NEXT:    vspltisw 4, -16
  ; CHECK-NEXT:    vspltisw 5, 15
  ; CHECK-NEXT:    vsubuwm 4, 5, 4
-; CHECK-NEXT:    vsrw 2, 2, 4
-; CHECK-NEXT:    vsubuwm 2, 3, 2
+; CHECK-NEXT:    vsraw 2, 2, 4
+; CHECK-NEXT:    vadduwm 2, 3, 2
  ; CHECK-NEXT:    blr
    %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
    %r = sub <4 x i32> %y, %sh
@@ -268,8 +268,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
  define i32 @sub_const_op_lshr(i32 %x) {
  ; CHECK-LABEL: sub_const_op_lshr:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    srwi 3, 3, 31
-; CHECK-NEXT:    subfic 3, 3, 43
+; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    addi 3, 3, 43
  ; CHECK-NEXT:    blr
    %sh = lshr i32 %x, 31
    %r = sub i32 43, %sh
@@ -284,9 +284,9 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
  ; CHECK-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
  ; CHECK-NEXT:    addi 3, 3, .LCPI21_0@toc@l
  ; CHECK-NEXT:    vsubuwm 3, 4, 3
-; CHECK-NEXT:    vsrw 2, 2, 3
+; CHECK-NEXT:    vsraw 2, 2, 3
  ; CHECK-NEXT:    lvx 3, 0, 3
-; CHECK-NEXT:    vsubuwm 2, 3, 2
+; CHECK-NEXT:    vadduwm 2, 2, 3
  ; CHECK-NEXT:    blr
    %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
    %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll

index cee6479..1579a77 100644 (file)
--- a/llvm/test/CodeGen/X86/signbit-shift.ll
+++ b/llvm/test/CodeGen/X86/signbit-shift.ll
@@ -156,9 +156,9 @@ define i32 @sext_ifneg(i32 %x) {
  define i32 @add_sext_ifneg(i32 %x) {
  ; CHECK-LABEL: add_sext_ifneg:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $31, %edi
-; CHECK-NEXT:    movl $42, %eax
-; CHECK-NEXT:    subl %edi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    sarl $31, %edi
+; CHECK-NEXT:    leal 42(%rdi), %eax
  ; CHECK-NEXT:    retq
    %c = icmp slt i32 %x, 0
    %e = sext i1 %c to i32
@@ -169,9 +169,9 @@ define i32 @add_sext_ifneg(i32 %x) {
  define i32 @sel_ifneg_fval_bigger(i32 %x) {
  ; CHECK-LABEL: sel_ifneg_fval_bigger:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $31, %edi
-; CHECK-NEXT:    movl $42, %eax
-; CHECK-NEXT:    subl %edi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    sarl $31, %edi
+; CHECK-NEXT:    leal 42(%rdi), %eax
  ; CHECK-NEXT:    retq
    %c = icmp slt i32 %x, 0
    %r = select i1 %c, i32 41, i32 42
@@ -231,9 +231,10 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
  define i32 @sub_lshr(i32 %x, i32 %y) {
  ; CHECK-LABEL: sub_lshr:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $31, %edi
-; CHECK-NEXT:    subl %edi, %esi
-; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    sarl $31, %edi
+; CHECK-NEXT:    leal (%rdi,%rsi), %eax
  ; CHECK-NEXT:    retq
    %sh = lshr i32 %x, 31
    %r = sub i32 %y, %sh
@@ -243,9 +244,8 @@ define i32 @sub_lshr(i32 %x, i32 %y) {
  define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
  ; CHECK-LABEL: sub_lshr_vec:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    psrld $31, %xmm0
-; CHECK-NEXT:    psubd %xmm0, %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    paddd %xmm1, %xmm0
  ; CHECK-NEXT:    retq
    %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
    %r = sub <4 x i32> %y, %sh
@@ -255,9 +255,9 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
  define i32 @sub_const_op_lshr(i32 %x) {
  ; CHECK-LABEL: sub_const_op_lshr:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $31, %edi
-; CHECK-NEXT:    xorl $43, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    sarl $31, %edi
+; CHECK-NEXT:    leal 43(%rdi), %eax
  ; CHECK-NEXT:    retq
    %sh = lshr i32 %x, 31
    %r = sub i32 43, %sh
@@ -267,10 +267,8 @@ define i32 @sub_const_op_lshr(i32 %x) {
  define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
  ; CHECK-LABEL: sub_const_op_lshr_vec:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    psrld $31, %xmm0
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [42,42,42,42]
-; CHECK-NEXT:    psubd %xmm0, %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    paddd {{.*}}(%rip), %xmm0
  ; CHECK-NEXT:    retq
    %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
    %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 30 Jul 2018 22:21:37 +0000 (22:21 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 30 Jul 2018 22:21:37 +0000 (22:21 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/signbit-shift.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/signbit-shift.ll		patch \| blob \| history
llvm/test/CodeGen/X86/signbit-shift.ll		patch \| blob \| history