if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
return V;
+ // Hoist one-use addition by constant: (x + C) - y -> (x - y) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N0.getOperand(1))) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+ }
+
// If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
// rather than 'sub 0/1' (the sext should get folded).
// sub X, (zext i1 Y) --> add X, (sext i1 Y)
define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32
-; CHECK-NEXT: sub w8, w8, w1
+; CHECK-NEXT: neg w8, w1
; CHECK-NEXT: sub w8, w8, w2
; CHECK-NEXT: lsr w0, w0, w8
; CHECK-NEXT: ret
define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64
-; CHECK-NEXT: sub x8, x8, x1
+; CHECK-NEXT: neg x8, x1
; CHECK-NEXT: sub x8, x8, x2
; CHECK-NEXT: lsr x0, x0, x8
; CHECK-NEXT: ret
; CHECK-LABEL: sink_add_of_const_to_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w1
-; CHECK-NEXT: add w8, w8, #32 // =32
-; CHECK-NEXT: sub w0, w8, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: add w0, w8, #32 // =32
; CHECK-NEXT: ret
%t0 = sub i32 %a, %b
%t1 = add i32 %t0, 32 ; constant always on RHS
; CHECK-LABEL: sink_sub_of_const_to_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w1
-; CHECK-NEXT: sub w8, w8, #32 // =32
-; CHECK-NEXT: sub w0, w8, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: sub w0, w8, #32 // =32
; CHECK-NEXT: ret
%t0 = sub i32 %a, %b
%t1 = sub i32 %t0, 32
; CHECK-LABEL: sink_sub_from_const_to_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w1, w0
-; CHECK-NEXT: add w8, w8, #32 // =32
-; CHECK-NEXT: sub w0, w8, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: add w0, w8, #32 // =32
; CHECK-NEXT: ret
%t0 = sub i32 %a, %b
%t1 = sub i32 32, %t0
; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
; CHECK-NEXT: ret
%t0 = sub <4 x i32> %a, %b
%t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
; CHECK-NEXT: adrp x8, .LCPI22_0
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI22_0]
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
; CHECK-NEXT: ret
%t0 = sub <4 x i32> %a, %b
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
; VARIANT0-NEXT: v_mov_b32_e32 v2, 0
; VARIANT0-NEXT: s_waitcnt lgkmcnt(0)
; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT: s_add_i32 s2, s2, -1
-; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; VARIANT0-NEXT: s_waitcnt expcnt(0)
+; VARIANT0-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT0-NEXT: s_waitcnt vmcnt(0)
; VARIANT0-NEXT: s_barrier
-; VARIANT0-NEXT: v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, -1, v0
; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2
; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
; VARIANT1-NEXT: v_mov_b32_e32 v2, 0
; VARIANT1-NEXT: s_waitcnt lgkmcnt(0)
; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT: s_add_i32 s2, s2, -1
+; VARIANT1-NEXT: s_waitcnt expcnt(0)
+; VARIANT1-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
; VARIANT1-NEXT: s_barrier
-; VARIANT1-NEXT: v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, -1, v0
; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2
-; VARIANT1-NEXT: s_waitcnt expcnt(0)
; VARIANT1-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
; VARIANT1-NEXT: s_waitcnt vmcnt(0)
; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
; VARIANT2-NEXT: global_store_dword v[1:2], v0, off
; VARIANT2-NEXT: s_waitcnt vmcnt(0)
; VARIANT2-NEXT: s_barrier
-; VARIANT2-NEXT: s_add_i32 s0, s0, -1
-; VARIANT2-NEXT: v_sub_u32_e32 v3, s0, v0
+; VARIANT2-NEXT: v_sub_u32_e32 v0, s0, v0
+; VARIANT2-NEXT: v_add_u32_e32 v3, -1, v0
; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
; VARIANT2-NEXT: v_mov_b32_e32 v0, s3
; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
; VARIANT3-NEXT: global_store_dword v[1:2], v0, off
; VARIANT3-NEXT: s_barrier
-; VARIANT3-NEXT: s_add_i32 s0, s0, -1
-; VARIANT3-NEXT: v_sub_u32_e32 v3, s0, v0
+; VARIANT3-NEXT: v_sub_u32_e32 v0, s0, v0
+; VARIANT3-NEXT: v_add_u32_e32 v3, -1, v0
; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4]
; VARIANT3-NEXT: v_mov_b32_e32 v0, s3
define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
; SSE-LABEL: combine_vec_add_sub_sub:
; SSE: # %bb.0:
-; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
; SSE-NEXT: paddd %xmm2, %xmm1
; SSE-NEXT: psubd %xmm1, %xmm0
+; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_add_sub_sub:
; AVX: # %bb.0:
-; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%1 = sub <4 x i32> %a, %b
%2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je LBB0_55
; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720
-; CHECK-NEXT: movq %rdx, %r14
-; CHECK-NEXT: movq %rdi, %r15
+; CHECK-NEXT: movq %rdx, %rbx
+; CHECK-NEXT: movq %rdi, %rbp
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: cmpq %rax, %rcx
; CHECK-NEXT: movl $32, %esi
; CHECK-NEXT: callq _memset
; CHECK-NEXT: LBB0_8: ## %while.body.preheader
-; CHECK-NEXT: imulq $1040, %r14, %rax ## imm = 0x410
+; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410
; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx
; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx
-; CHECK-NEXT: movl $1, %r14d
+; CHECK-NEXT: movl $1, %r15d
; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax
; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne LBB0_9
; CHECK-NEXT: ## %bb.10: ## %do.end
-; CHECK-NEXT: xorl %r12d, %r12d
-; CHECK-NEXT: testb %r12b, %r12b
+; CHECK-NEXT: xorl %r14d, %r14d
+; CHECK-NEXT: testb %r14b, %r14b
; CHECK-NEXT: jne LBB0_11
; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
; CHECK-NEXT: xorl %ebp, %ebp
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: jmp LBB0_13
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_14: ## %while.body200
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: leal 1(%r12), %eax
+; CHECK-NEXT: leal 1(%r14), %eax
; CHECK-NEXT: cmpl $21, %eax
; CHECK-NEXT: ja LBB0_20
; CHECK-NEXT: ## %bb.15: ## %while.body200
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $-1, %ecx
+; CHECK-NEXT: movl $-1, %r13d
; CHECK-NEXT: movslq (%rsi,%rax,4), %rax
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: jmpq *%rax
; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $1, %ecx
+; CHECK-NEXT: movl $1, %r13d
; CHECK-NEXT: jmp LBB0_21
; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: movl %r12d, %ecx
+; CHECK-NEXT: movl %r14d, %r13d
; CHECK-NEXT: jne LBB0_21
; CHECK-NEXT: jmp LBB0_55
; CHECK-NEXT: LBB0_26: ## %sw.bb474
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: ## implicit-def: $r13
+; CHECK-NEXT: ## implicit-def: $r12
; CHECK-NEXT: jne LBB0_34
; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: ## implicit-def: $r13
+; CHECK-NEXT: ## implicit-def: $r12
; CHECK-NEXT: jne LBB0_34
; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: js LBB0_55
; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT: movq %rax, %r13
+; CHECK-NEXT: movq %rax, %r12
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: jne LBB0_32
; CHECK-NEXT: ## %bb.31: ## %lor.rhs500
; CHECK-NEXT: je LBB0_34
; CHECK-NEXT: LBB0_32: ## %do.body479.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT: leaq 1(%r13), %rax
+; CHECK-NEXT: leaq 1(%r12), %rax
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: jne LBB0_29
; CHECK-NEXT: ## %bb.33: ## %if.end517.loopexitsplit
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: incq %r13
+; CHECK-NEXT: incq %r12
; CHECK-NEXT: LBB0_34: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; CHECK-NEXT: addl $-324, %eax ## imm = 0xFEBC
+; CHECK-NEXT: leal -324(%r13), %eax
; CHECK-NEXT: cmpl $59, %eax
; CHECK-NEXT: ja LBB0_35
; CHECK-NEXT: ## %bb.57: ## %if.end517
; CHECK-NEXT: jb LBB0_38
; CHECK-NEXT: LBB0_35: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT: cmpl $11, %r13d
; CHECK-NEXT: je LBB0_38
; CHECK-NEXT: ## %bb.36: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT: cmpl $24, %r13d
; CHECK-NEXT: je LBB0_38
; CHECK-NEXT: ## %bb.37: ## %if.then532
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: movb $0, (%r13)
-; CHECK-NEXT: movl %r12d, %ecx
+; CHECK-NEXT: movb $0, (%r12)
+; CHECK-NEXT: movl %r14d, %r13d
; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
; CHECK-NEXT: jmp LBB0_21
; CHECK-NEXT: jb LBB0_55
; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: xorl %ebp, %ebp
-; CHECK-NEXT: movl $268, %ecx ## imm = 0x10C
+; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C
; CHECK-NEXT: jmp LBB0_21
; CHECK-NEXT: LBB0_19: ## %sw.bb243
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: movl $2, %r13d
; CHECK-NEXT: jmp LBB0_21
; CHECK-NEXT: LBB0_40: ## %sw.bb566
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $20, %ecx
+; CHECK-NEXT: movl $20, %r13d
; CHECK-NEXT: jmp LBB0_21
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_13: ## %while.body200
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_29 Depth 2
; CHECK-NEXT: ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT: leal -268(%r12), %eax
+; CHECK-NEXT: leal -268(%r14), %eax
; CHECK-NEXT: cmpl $105, %eax
; CHECK-NEXT: ja LBB0_14
; CHECK-NEXT: ## %bb.56: ## %while.body200
; CHECK-NEXT: jmpq *%rax
; CHECK-NEXT: LBB0_20: ## %sw.bb256
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl %r12d, %ecx
+; CHECK-NEXT: movl %r14d, %r13d
; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: decl %r14d
-; CHECK-NEXT: testl %r14d, %r14d
-; CHECK-NEXT: movl %ecx, %r12d
+; CHECK-NEXT: decl %r15d
+; CHECK-NEXT: testl %r15d, %r15d
+; CHECK-NEXT: movl %r13d, %r14d
; CHECK-NEXT: jg LBB0_13
; CHECK-NEXT: jmp LBB0_22
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: jmp LBB0_25
; CHECK-NEXT: LBB0_11:
; CHECK-NEXT: xorl %ebp, %ebp
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: LBB0_22: ## %while.end1465
-; CHECK-NEXT: incl %ecx
-; CHECK-NEXT: cmpl $16, %ecx
+; CHECK-NEXT: incl %r13d
+; CHECK-NEXT: cmpl $16, %r13d
; CHECK-NEXT: ja LBB0_50
; CHECK-NEXT: ## %bb.23: ## %while.end1465
; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT: btl %ecx, %eax
+; CHECK-NEXT: btl %r13d, %eax
; CHECK-NEXT: jae LBB0_50
; CHECK-NEXT: ## %bb.24:
-; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: xorl %ebp, %ebp
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
; CHECK-NEXT: LBB0_48: ## %if.then1477
; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: callq _write
-; CHECK-NEXT: addq $8189, %r15 ## imm = 0x1FFD
-; CHECK-NEXT: subq %rbx, %r15
-; CHECK-NEXT: addq _syHistory@{{.*}}(%rip), %r15
+; CHECK-NEXT: subq %rbp, %rbx
+; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax
+; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_49: ## %for.body1723
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: decq %r15
+; CHECK-NEXT: decq %rax
; CHECK-NEXT: jmp LBB0_49
; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader
; CHECK-NEXT: movl $512, %eax ## imm = 0x200
; CHECK-NEXT: LBB0_55: ## %if.then.i
; CHECK-NEXT: ud2
; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit
-; CHECK-NEXT: movq %r15, %rbx
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT: movq %rbx, %rbp
; CHECK-NEXT: jmp LBB0_48
; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader
; CHECK-NEXT: xorl %eax, %eax
; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl $32, %ecx
+; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: # kill: def $cl killed $cl killed $ecx
;
; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
; X64: # %bb.0:
+; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: movl $32, %ecx
-; X64-NEXT: subl %esi, %ecx
+; X64-NEXT: negl %ecx
; X64-NEXT: subl %edx, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %eax
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl $64, %ecx
+; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addb $64, %cl
; X32-NEXT: movl %esi, %edx
; X32-NEXT: shrl %cl, %edx
; X32-NEXT: shrdl %cl, %esi, %eax
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: addl $32, %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: addl $32, %eax
; X32-NEXT: retl
;
; X64-LABEL: sink_add_of_const_to_sub:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: subl %esi, %edi
+; X64-NEXT: subl %edx, %edi
; X64-NEXT: leal 32(%rdi), %eax
-; X64-NEXT: subl %edx, %eax
; X64-NEXT: retq
%t0 = sub i32 %a, %b
%t1 = add i32 %t0, 32 ; constant always on RHS
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: addl $-32, %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: addl $-32, %eax
; X32-NEXT: retl
;
; X64-LABEL: sink_sub_of_const_to_sub:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: subl %esi, %edi
+; X64-NEXT: subl %edx, %edi
; X64-NEXT: leal -32(%rdi), %eax
-; X64-NEXT: subl %edx, %eax
; X64-NEXT: retq
%t0 = sub i32 %a, %b
%t1 = sub i32 %t0, 32
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: addl $32, %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: addl $32, %eax
; X32-NEXT: retl
;
; X64-LABEL: sink_sub_from_const_to_sub:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: subl %edi, %esi
+; X64-NEXT: subl %edx, %esi
; X64-NEXT: leal 32(%rsi), %eax
-; X64-NEXT: subl %edx, %eax
; X64-NEXT: retq
%t0 = sub i32 %a, %b
%t1 = sub i32 32, %t0
; X32-LABEL: vec_sink_add_of_const_to_sub:
; X32: # %bb.0:
; X32-NEXT: psubd %xmm1, %xmm0
-; X32-NEXT: paddd {{\.LCPI.*}}, %xmm0
; X32-NEXT: psubd %xmm2, %xmm0
+; X32-NEXT: paddd {{\.LCPI.*}}, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: vec_sink_add_of_const_to_sub:
; X64: # %bb.0:
; X64-NEXT: psubd %xmm1, %xmm0
-; X64-NEXT: paddd {{.*}}(%rip), %xmm0
; X64-NEXT: psubd %xmm2, %xmm0
+; X64-NEXT: paddd {{.*}}(%rip), %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %a, %b
%t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
; X32-LABEL: vec_sink_sub_from_const_to_sub:
; X32: # %bb.0:
; X32-NEXT: psubd %xmm0, %xmm1
-; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X32-NEXT: psubd %xmm2, %xmm1
+; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1
; X32-NEXT: movdqa %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: vec_sink_sub_from_const_to_sub:
; X64: # %bb.0:
; X64-NEXT: psubd %xmm0, %xmm1
-; X64-NEXT: paddd {{.*}}(%rip), %xmm1
; X64-NEXT: psubd %xmm2, %xmm1
+; X64-NEXT: paddd {{.*}}(%rip), %xmm1
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
%t0 = sub <4 x i32> %a, %b
; CHECK-NEXT: subq %rax, %rsi
; CHECK-NEXT: movq (%rdx), %rax
; CHECK-NEXT: movswl 8(%rdi), %edx
-; CHECK-NEXT: movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B
; CHECK-NEXT: movswl (%rax,%rsi,2), %eax
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: imull %edx, %eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: addl $2138875574, %eax # imm = 0x7F7CA6B6
; CHECK-NEXT: cmpl $-8608074, %eax # imm = 0xFF7CA6B6
-; CHECK-NEXT: movslq %eax, %r8
+; CHECK-NEXT: movslq %eax, %rdi
; CHECK-NEXT: setl %dl
; CHECK-NEXT: cmpl $2138875573, %eax # imm = 0x7F7CA6B5
-; CHECK-NEXT: movq %r8, %r9
+; CHECK-NEXT: movq %rdi, %r8
; CHECK-NEXT: leal -1(%rdx,%rdx), %edx
; CHECK-NEXT: cmovlel %edx, %esi
-; CHECK-NEXT: subq %rax, %r9
-; CHECK-NEXT: addq %r8, %rdi
+; CHECK-NEXT: subq %rax, %r8
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $1, %esi
-; CHECK-NEXT: cmovneq %rax, %r9
-; CHECK-NEXT: testl %r8d, %r8d
-; CHECK-NEXT: cmovnsq %rax, %r9
-; CHECK-NEXT: movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF
-; CHECK-NEXT: subq %r9, %rdi
-; CHECK-NEXT: addq (%rcx), %rdi
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: cmovneq %rax, %r8
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsq %rax, %r8
+; CHECK-NEXT: movq (%rcx), %rax
+; CHECK-NEXT: subq %r8, %rdi
+; CHECK-NEXT: leaq -2138875574(%rax,%rdi), %rax
; CHECK-NEXT: movq %rax, (%rcx)
; CHECK-NEXT: retq
entry: