}
}
+ // Prefer an add for more folding potential and possibly better codegen:
+ // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
+ if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
+ SDValue ShAmt = N1.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
+ }
+ }
+
return SDValue();
}
define i32 @add_sext_ifneg(i32 %x) {
; CHECK-LABEL: add_sext_ifneg:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #42
-; CHECK-NEXT: sub w0, w8, w0, lsr #31
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%c = icmp slt i32 %x, 0
%e = sext i1 %c to i32
define i32 @sub_lshr(i32 %x, i32 %y) {
; CHECK-LABEL: sub_lshr:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w0, w1, w0, lsr #31
+; CHECK-NEXT: add w0, w1, w0, asr #31
; CHECK-NEXT: ret
%sh = lshr i32 %x, 31
%r = sub i32 %y, %sh
define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: sub_lshr_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
-; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ssra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> %y, %sh
define i32 @sub_const_op_lshr(i32 %x) {
; CHECK-LABEL: sub_const_op_lshr:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #43
-; CHECK-NEXT: sub w0, w8, w0, lsr #31
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #43 // =43
; CHECK-NEXT: ret
%sh = lshr i32 %x, 31
%r = sub i32 43, %sh
define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
; CHECK-LABEL: sub_const_op_lshr_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
; CHECK-NEXT: movi v1.4s, #42
-; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ssra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
define i32 @sub_lshr(i32 %x, i32 %y) {
; CHECK-LABEL: sub_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: subf 3, 3, 4
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: add 3, 4, 3
; CHECK-NEXT: blr
%sh = lshr i32 %x, 31
%r = sub i32 %y, %sh
; CHECK-NEXT: vspltisw 4, -16
; CHECK-NEXT: vspltisw 5, 15
; CHECK-NEXT: vsubuwm 4, 5, 4
-; CHECK-NEXT: vsrw 2, 2, 4
-; CHECK-NEXT: vsubuwm 2, 3, 2
+; CHECK-NEXT: vsraw 2, 2, 4
+; CHECK-NEXT: vadduwm 2, 3, 2
; CHECK-NEXT: blr
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> %y, %sh
define i32 @sub_const_op_lshr(i32 %x) {
; CHECK-LABEL: sub_const_op_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: subfic 3, 3, 43
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: addi 3, 3, 43
; CHECK-NEXT: blr
%sh = lshr i32 %x, 31
%r = sub i32 43, %sh
; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l
; CHECK-NEXT: vsubuwm 3, 4, 3
-; CHECK-NEXT: vsrw 2, 2, 3
+; CHECK-NEXT: vsraw 2, 2, 3
; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: vsubuwm 2, 3, 2
+; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh
define i32 @add_sext_ifneg(i32 %x) {
; CHECK-LABEL: add_sext_ifneg:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 0
%e = sext i1 %c to i32
define i32 @sel_ifneg_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifneg_fval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp slt i32 %x, 0
%r = select i1 %c, i32 41, i32 42
define i32 @sub_lshr(i32 %x, i32 %y) {
; CHECK-LABEL: sub_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: subl %edi, %esi
-; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal (%rdi,%rsi), %eax
; CHECK-NEXT: retq
%sh = lshr i32 %x, 31
%r = sub i32 %y, %sh
define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: sub_lshr_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: psrld $31, %xmm0
-; CHECK-NEXT: psubd %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: paddd %xmm1, %xmm0
; CHECK-NEXT: retq
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> %y, %sh
define i32 @sub_const_op_lshr(i32 %x) {
; CHECK-LABEL: sub_const_op_lshr:
; CHECK: # %bb.0:
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: xorl $43, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 43(%rdi), %eax
; CHECK-NEXT: retq
%sh = lshr i32 %x, 31
%r = sub i32 43, %sh
define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) {
; CHECK-LABEL: sub_const_op_lshr_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: psrld $31, %xmm0
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42]
-; CHECK-NEXT: psubd %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh