}
}
+ // (x - y) + -1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isAllOnesOrAllOnesSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
return Combined;
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
return V;
+ // (x - y) - 1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
// Hoist one-use addition by constant: (x + C) - y -> (x - y) + C
if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
isConstantOrConstantVector(N0.getOperand(1))) {
define i32 @add_of_not(i32 %x, i32 %y) {
; CHECK-LABEL: add_of_not:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w0, w1
-; CHECK-NEXT: sub w0, w8, #1 // =1
+; CHECK-NEXT: mvn w8, w1
+; CHECK-NEXT: add w0, w8, w0
; CHECK-NEXT: ret
%t0 = sub i32 %x, %y
%r = add i32 %t0, -1
define i32 @add_of_not_decrement(i32 %x, i32 %y) {
; CHECK-LABEL: add_of_not_decrement:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub w8, w0, w1
-; CHECK-NEXT: sub w0, w8, #1 // =1
+; CHECK-NEXT: mvn w8, w1
+; CHECK-NEXT: add w0, w8, w0
; CHECK-NEXT: ret
%t0 = sub i32 %x, %y
%r = sub i32 %t0, 1
define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: vec_add_of_not:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mvn v1.16b, v1.16b
+; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
%t0 = sub <4 x i32> %x, %y
%r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: vec_add_of_not_decrement:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: mvn v1.16b, v1.16b
+; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
%t0 = sub <4 x i32> %x, %y
%r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
; VARIANT0: ; %bb.0: ; %entry
; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; VARIANT0-NEXT: s_load_dword s2, s[0:1], 0xb
+; VARIANT0-NEXT: v_not_b32_e32 v3, v0
; VARIANT0-NEXT: s_mov_b32 s7, 0xf000
; VARIANT0-NEXT: s_mov_b32 s6, 0
; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; VARIANT0-NEXT: v_mov_b32_e32 v2, 0
; VARIANT0-NEXT: s_waitcnt lgkmcnt(0)
; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT: s_waitcnt expcnt(0)
-; VARIANT0-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
-; VARIANT0-NEXT: s_waitcnt vmcnt(0)
+; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; VARIANT0-NEXT: s_barrier
-; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, -1, v0
; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2
; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
; VARIANT1: ; %bb.0: ; %entry
; VARIANT1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; VARIANT1-NEXT: s_load_dword s2, s[0:1], 0xb
+; VARIANT1-NEXT: v_not_b32_e32 v3, v0
; VARIANT1-NEXT: s_mov_b32 s7, 0xf000
; VARIANT1-NEXT: s_mov_b32 s6, 0
; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; VARIANT1-NEXT: v_mov_b32_e32 v2, 0
; VARIANT1-NEXT: s_waitcnt lgkmcnt(0)
; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT: s_waitcnt expcnt(0)
-; VARIANT1-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s2, v3
; VARIANT1-NEXT: s_barrier
-; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, -1, v0
; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2
+; VARIANT1-NEXT: s_waitcnt expcnt(0)
; VARIANT1-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
; VARIANT1-NEXT: s_waitcnt vmcnt(0)
; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
; VARIANT2-NEXT: global_store_dword v[1:2], v0, off
; VARIANT2-NEXT: s_waitcnt vmcnt(0)
; VARIANT2-NEXT: s_barrier
-; VARIANT2-NEXT: v_sub_u32_e32 v0, s0, v0
-; VARIANT2-NEXT: v_add_u32_e32 v3, -1, v0
+; VARIANT2-NEXT: v_xad_u32 v3, v0, -1, s0
; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
; VARIANT2-NEXT: v_mov_b32_e32 v0, s3
; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
; VARIANT3-NEXT: global_store_dword v[1:2], v0, off
; VARIANT3-NEXT: s_barrier
-; VARIANT3-NEXT: v_sub_u32_e32 v0, s0, v0
-; VARIANT3-NEXT: v_add_u32_e32 v3, -1, v0
+; VARIANT3-NEXT: v_xad_u32 v3, v0, -1, s0
; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
; VARIANT3-NEXT: v_mov_b32_e32 v0, s3
; X32-LABEL: add_of_not:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: decl %eax
+; X32-NEXT: notl %eax
+; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
;
; X64-LIN-LABEL: add_of_not:
; X64-LIN: # %bb.0:
+; X64-LIN-NEXT: # kill: def $esi killed $esi def $rsi
; X64-LIN-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT: subl %esi, %edi
-; X64-LIN-NEXT: leal -1(%rdi), %eax
+; X64-LIN-NEXT: notl %esi
+; X64-LIN-NEXT: leal (%rsi,%rdi), %eax
; X64-LIN-NEXT: retq
;
; X64-WIN-LABEL: add_of_not:
; X64-WIN: # %bb.0:
+; X64-WIN-NEXT: # kill: def $edx killed $edx def $rdx
; X64-WIN-NEXT: # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT: subl %edx, %ecx
-; X64-WIN-NEXT: leal -1(%rcx), %eax
+; X64-WIN-NEXT: notl %edx
+; X64-WIN-NEXT: leal (%rdx,%rcx), %eax
; X64-WIN-NEXT: retq
%t0 = sub i32 %x, %y
%r = add i32 %t0, -1
; X32-LABEL: add_of_not_decrement:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: decl %eax
+; X32-NEXT: notl %eax
+; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
;
; X64-LIN-LABEL: add_of_not_decrement:
; X64-LIN: # %bb.0:
+; X64-LIN-NEXT: # kill: def $esi killed $esi def $rsi
; X64-LIN-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT: subl %esi, %edi
-; X64-LIN-NEXT: leal -1(%rdi), %eax
+; X64-LIN-NEXT: notl %esi
+; X64-LIN-NEXT: leal (%rsi,%rdi), %eax
; X64-LIN-NEXT: retq
;
; X64-WIN-LABEL: add_of_not_decrement:
; X64-WIN: # %bb.0:
+; X64-WIN-NEXT: # kill: def $edx killed $edx def $rdx
; X64-WIN-NEXT: # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT: subl %edx, %ecx
-; X64-WIN-NEXT: leal -1(%rcx), %eax
+; X64-WIN-NEXT: notl %edx
+; X64-WIN-NEXT: leal (%rdx,%rcx), %eax
; X64-WIN-NEXT: retq
%t0 = sub i32 %x, %y
%r = sub i32 %t0, 1
define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
; X32-LABEL: vec_add_of_not:
; X32: # %bb.0:
-; X32-NEXT: psubd %xmm1, %xmm0
-; X32-NEXT: pcmpeqd %xmm1, %xmm1
-; X32-NEXT: paddd %xmm1, %xmm0
+; X32-NEXT: pcmpeqd %xmm2, %xmm2
+; X32-NEXT: pxor %xmm1, %xmm2
+; X32-NEXT: paddd %xmm2, %xmm0
; X32-NEXT: retl
;
; X64-LIN-LABEL: vec_add_of_not:
; X64-LIN: # %bb.0:
-; X64-LIN-NEXT: psubd %xmm1, %xmm0
-; X64-LIN-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT: paddd %xmm1, %xmm0
+; X64-LIN-NEXT: pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT: pxor %xmm1, %xmm2
+; X64-LIN-NEXT: paddd %xmm2, %xmm0
; X64-LIN-NEXT: retq
;
; X64-WIN-LABEL: vec_add_of_not:
; X64-WIN: # %bb.0:
-; X64-WIN-NEXT: movdqa (%rcx), %xmm1
-; X64-WIN-NEXT: psubd (%rdx), %xmm1
; X64-WIN-NEXT: pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT: paddd %xmm1, %xmm0
+; X64-WIN-NEXT: pxor (%rdx), %xmm0
+; X64-WIN-NEXT: paddd (%rcx), %xmm0
; X64-WIN-NEXT: retq
%t0 = sub <4 x i32> %x, %y
%r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
; X32-LABEL: vec_add_of_not_decrement:
; X32: # %bb.0:
-; X32-NEXT: psubd %xmm1, %xmm0
-; X32-NEXT: pcmpeqd %xmm1, %xmm1
-; X32-NEXT: paddd %xmm1, %xmm0
+; X32-NEXT: pcmpeqd %xmm2, %xmm2
+; X32-NEXT: pxor %xmm1, %xmm2
+; X32-NEXT: paddd %xmm2, %xmm0
; X32-NEXT: retl
;
; X64-LIN-LABEL: vec_add_of_not_decrement:
; X64-LIN: # %bb.0:
-; X64-LIN-NEXT: psubd %xmm1, %xmm0
-; X64-LIN-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT: paddd %xmm1, %xmm0
+; X64-LIN-NEXT: pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT: pxor %xmm1, %xmm2
+; X64-LIN-NEXT: paddd %xmm2, %xmm0
; X64-LIN-NEXT: retq
;
; X64-WIN-LABEL: vec_add_of_not_decrement:
; X64-WIN: # %bb.0:
-; X64-WIN-NEXT: movdqa (%rcx), %xmm1
-; X64-WIN-NEXT: psubd (%rdx), %xmm1
; X64-WIN-NEXT: pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT: paddd %xmm1, %xmm0
+; X64-WIN-NEXT: pxor (%rdx), %xmm0
+; X64-WIN-NEXT: paddd (%rcx), %xmm0
; X64-WIN-NEXT: retq
%t0 = sub <4 x i32> %x, %y
%r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>