; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $48, %rsp
; WIN64-NEXT: .seh_stackalloc 48
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 32
-; WIN64-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 16
; WIN64-NEXT: .seh_endprologue
; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002
; WIN64-NEXT: movq %rax, %rcx
; WIN64-NEXT: movq %rax, %rdx
; WIN64-NEXT: movq %rax, %rdi
-; WIN64-NEXT: movq %rax, %rsi
; WIN64-NEXT: movq %rax, %r8
; WIN64-NEXT: movq %rax, %r9
; WIN64-NEXT: movq %rax, %r10
; WIN64-NEXT: movq %rax, %r12
; WIN64-NEXT: movq %rax, %r14
; WIN64-NEXT: movq %rax, %r15
+; WIN64-NEXT: movq %rax, %rsi
; WIN64-NEXT: callq test_argv64i1
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $48, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; LINUXOSX64-NEXT: movq %rax, %rcx
; LINUXOSX64-NEXT: movq %rax, %rdx
; LINUXOSX64-NEXT: movq %rax, %rdi
-; LINUXOSX64-NEXT: movq %rax, %rsi
; LINUXOSX64-NEXT: movq %rax, %r8
; LINUXOSX64-NEXT: movq %rax, %r9
; LINUXOSX64-NEXT: movq %rax, %r12
; LINUXOSX64-NEXT: movq %rax, %r13
; LINUXOSX64-NEXT: movq %rax, %r14
; LINUXOSX64-NEXT: movq %rax, %r15
+; LINUXOSX64-NEXT: movq %rax, %rsi
; LINUXOSX64-NEXT: pushq %rax
; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8
; LINUXOSX64-NEXT: pushq %rax
; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 0
; WIN64-NEXT: kmovq %rax, %k0
; WIN64-NEXT: vpmovm2b %k0, %zmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esp
; X32-NEXT: subl $72, %esp
-; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill
-; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill
-; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill
+; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: kmovd %edx, %k0
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
; X32-NEXT: calll _test_argv32i1helper
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $72, %esp
; X32-NEXT: popl %esp
; X32-NEXT: vzeroupper
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $128, %rsp
-; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2
; LINUXOSX64-NEXT: callq test_argv32i1helper
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
; LINUXOSX64-NEXT: addq $128, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 0
; WIN64-NEXT: movl $1, %edx
; WIN64-NEXT: callq test_argv32i1
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 0
; WIN64-NEXT: callq test_retv32i1
; WIN64-NEXT: incl %eax
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $72, %esp
-; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill
-; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill
-; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill
+; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: kmovd %edx, %k0
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vzeroupper
; X32-NEXT: calll _test_argv16i1helper
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $72, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $128, %rsp
-; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: vzeroupper
; LINUXOSX64-NEXT: callq test_argv16i1helper
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
; LINUXOSX64-NEXT: addq $128, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 0
; WIN64-NEXT: movl $1, %edx
; WIN64-NEXT: callq test_argv16i1
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 0
; WIN64-NEXT: incl %eax
; WIN64-NEXT: # kill: def $ax killed $ax killed $eax
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; X32: # %bb.0:
; X32-NEXT: pushl %esp
; X32-NEXT: subl $72, %esp
-; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill
-; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill
-; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill
+; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill
; X32-NEXT: kmovd %edx, %k0
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vzeroupper
; X32-NEXT: calll _test_argv8i1helper
; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload
-; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
+; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
; X32-NEXT: addl $72, %esp
; X32-NEXT: popl %esp
; X32-NEXT: retl
; LINUXOSX64-NEXT: pushq %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: subq $128, %rsp
-; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill
-; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144
; LINUXOSX64-NEXT: .cfi_offset %rsp, -16
; LINUXOSX64-NEXT: vzeroupper
; LINUXOSX64-NEXT: callq test_argv8i1helper
; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload
-; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
+; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
; LINUXOSX64-NEXT: addq $128, %rsp
; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16
; LINUXOSX64-NEXT: popq %rsp
; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 0
; WIN64-NEXT: movl $1, %edx
; WIN64-NEXT: callq test_argv8i1
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
; WIN64-NEXT: .seh_pushreg 7
; WIN64-NEXT: subq $40, %rsp
; WIN64-NEXT: .seh_stackalloc 40
-; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill
+; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 7, 16
; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill
; WIN64-NEXT: .seh_savexmm 6, 0
; WIN64-NEXT: vpmovm2w %k0, %zmm0
; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
-; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload
+; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; WIN64-NEXT: addq $40, %rsp
; WIN64-NEXT: popq %rdi
; WIN64-NEXT: popq %rsi
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i386-unknown-linux-gnu -o - %s | FileCheck %s
+
+; To match GCC's behavior in assigning 64-bit values to a 32-bit
+; register, we bind the a subsequence of 2 registers starting with the
+; explicitly given register from the following sequence: EAX, EDX,
+; ECX, EBX, ESI, EDI, EBP, ESP, to the value. There is no wrapping
+; from the sequence, so this will fail given ESP.
+
+define dso_local i64 @test_eax(i64 %in) local_unnamed_addr nounwind {
+; CHECK-LABEL: test_eax:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl $-1985229329, %eax # imm = 0x89ABCDEF
+; CHECK-NEXT: movl $19088743, %edx # imm = 0x1234567
+; CHECK-NEXT: #APP
+; CHECK-NEXT: movl %eax, %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{eax},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895)
+ %conv = trunc i64 %0 to i32
+ %add = add nsw i32 %conv, 3
+ %conv1 = sext i32 %add to i64
+ ret i64 %conv1
+}
+
+define dso_local i64 @test_edx(i64 %in) local_unnamed_addr nounwind {
+; CHECK-LABEL: test_edx:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl $-1985229329, %edx # imm = 0x89ABCDEF
+; CHECK-NEXT: movl $19088743, %ecx # imm = 0x1234567
+; CHECK-NEXT: #APP
+; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{edx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895)
+ %conv = trunc i64 %0 to i32
+ %add = add nsw i32 %conv, 3
+ %conv1 = sext i32 %add to i64
+ ret i64 %conv1
+}
+
+define dso_local i64 @test_ecx(i64 %in) local_unnamed_addr nounwind {
+; CHECK-LABEL: test_ecx:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: movl $-1985229329, %ecx # imm = 0x89ABCDEF
+; CHECK-NEXT: movl $19088743, %ebx # imm = 0x1234567
+; CHECK-NEXT: #APP
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ecx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895)
+ %conv = trunc i64 %0 to i32
+ %add = add nsw i32 %conv, 3
+ %conv1 = sext i32 %add to i64
+ ret i64 %conv1
+}
+
+define dso_local i64 @test_ebx(i64 %in) local_unnamed_addr nounwind {
+; CHECK-LABEL: test_ebx:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl $-1985229329, %ebx # imm = 0x89ABCDEF
+; CHECK-NEXT: movl $19088743, %esi # imm = 0x1234567
+; CHECK-NEXT: #APP
+; CHECK-NEXT: movl %ebx, %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ebx},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895)
+ %conv = trunc i64 %0 to i32
+ %add = add nsw i32 %conv, 3
+ %conv1 = sext i32 %add to i64
+ ret i64 %conv1
+}
+
+define dso_local i64 @test_esi(i64 %in) local_unnamed_addr nounwind {
+; CHECK-LABEL: test_esi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl $-1985229329, %esi # imm = 0x89ABCDEF
+; CHECK-NEXT: movl $19088743, %edi # imm = 0x1234567
+; CHECK-NEXT: #APP
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{esi},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895)
+ %conv = trunc i64 %0 to i32
+ %add = add nsw i32 %conv, 3
+ %conv1 = sext i32 %add to i64
+ ret i64 %conv1
+}
+
+define dso_local i64 @test_edi(i64 %in) local_unnamed_addr nounwind {
+; CHECK-LABEL: test_edi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: movl $-1985229329, %edi # imm = 0x89ABCDEF
+; CHECK-NEXT: movl $19088743, %ebp # imm = 0x1234567
+; CHECK-NEXT: #APP
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{edi},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895)
+ %conv = trunc i64 %0 to i32
+ %add = add nsw i32 %conv, 3
+ %conv1 = sext i32 %add to i64
+ ret i64 %conv1
+}
+
+define dso_local i64 @test_ebp(i64 %in) local_unnamed_addr nounwind {
+; CHECK-LABEL: test_ebp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: movl $19088743, %esp # imm = 0x1234567
+; CHECK-NEXT: movl $-1985229329, %ebp # imm = 0x89ABCDEF
+; CHECK-NEXT: #APP
+; CHECK-NEXT: movl %ebp, %eax
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+entry:
+ %0 = tail call i64 asm sideeffect "mov $1, $0", "=r,{ebp},~{dirflag},~{fpsr},~{flags}"(i64 81985529216486895)
+ %conv = trunc i64 %0 to i32
+ %add = add nsw i32 %conv, 3
+ %conv1 = sext i32 %add to i64
+ ret i64 %conv1
+}
+
; CHECK-BASELINE-NEXT: movq %rcx, %r15
; CHECK-BASELINE-NEXT: movq %rsi, %r14
; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-BASELINE-NEXT: movb 15(%rcx), %al
-; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 16(%rcx), %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 17(%rcx), %al
; CHECK-BASELINE-NEXT: movb 20(%rcx), %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 21(%rcx), %r12b
-; CHECK-BASELINE-NEXT: movb 22(%rcx), %r10b
-; CHECK-BASELINE-NEXT: movb 23(%rcx), %r11b
-; CHECK-BASELINE-NEXT: movb 24(%rcx), %bpl
-; CHECK-BASELINE-NEXT: movb 25(%rcx), %r13b
-; CHECK-BASELINE-NEXT: movb 26(%rcx), %r9b
+; CHECK-BASELINE-NEXT: movb 22(%rcx), %r9b
+; CHECK-BASELINE-NEXT: movb 23(%rcx), %r10b
+; CHECK-BASELINE-NEXT: movb 24(%rcx), %r11b
+; CHECK-BASELINE-NEXT: movb 25(%rcx), %bpl
+; CHECK-BASELINE-NEXT: movb 26(%rcx), %r13b
; CHECK-BASELINE-NEXT: movb 27(%rcx), %r8b
; CHECK-BASELINE-NEXT: movb 28(%rcx), %dil
; CHECK-BASELINE-NEXT: movb 29(%rcx), %sil
; CHECK-BASELINE-NEXT: orb %al, %r8b
; CHECK-BASELINE-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 26(%r14), %al
-; CHECK-BASELINE-NEXT: andb %r9b, %al
-; CHECK-BASELINE-NEXT: notb %r9b
-; CHECK-BASELINE-NEXT: andb 26(%rdx), %r9b
-; CHECK-BASELINE-NEXT: orb %al, %r9b
-; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 25(%r14), %al
; CHECK-BASELINE-NEXT: andb %r13b, %al
; CHECK-BASELINE-NEXT: notb %r13b
-; CHECK-BASELINE-NEXT: andb 25(%rdx), %r13b
+; CHECK-BASELINE-NEXT: andb 26(%rdx), %r13b
; CHECK-BASELINE-NEXT: orb %al, %r13b
; CHECK-BASELINE-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 24(%r14), %al
+; CHECK-BASELINE-NEXT: movb 25(%r14), %al
; CHECK-BASELINE-NEXT: andb %bpl, %al
; CHECK-BASELINE-NEXT: notb %bpl
-; CHECK-BASELINE-NEXT: andb 24(%rdx), %bpl
+; CHECK-BASELINE-NEXT: andb 25(%rdx), %bpl
; CHECK-BASELINE-NEXT: orb %al, %bpl
; CHECK-BASELINE-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 23(%r14), %al
+; CHECK-BASELINE-NEXT: movb 24(%r14), %al
; CHECK-BASELINE-NEXT: andb %r11b, %al
; CHECK-BASELINE-NEXT: notb %r11b
-; CHECK-BASELINE-NEXT: andb 23(%rdx), %r11b
+; CHECK-BASELINE-NEXT: andb 24(%rdx), %r11b
; CHECK-BASELINE-NEXT: orb %al, %r11b
; CHECK-BASELINE-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 22(%r14), %al
+; CHECK-BASELINE-NEXT: movb 23(%r14), %al
; CHECK-BASELINE-NEXT: andb %r10b, %al
; CHECK-BASELINE-NEXT: notb %r10b
-; CHECK-BASELINE-NEXT: andb 22(%rdx), %r10b
+; CHECK-BASELINE-NEXT: andb 23(%rdx), %r10b
; CHECK-BASELINE-NEXT: orb %al, %r10b
; CHECK-BASELINE-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-BASELINE-NEXT: movb 22(%r14), %al
+; CHECK-BASELINE-NEXT: andb %r9b, %al
+; CHECK-BASELINE-NEXT: notb %r9b
+; CHECK-BASELINE-NEXT: andb 22(%rdx), %r9b
+; CHECK-BASELINE-NEXT: orb %al, %r9b
+; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 21(%r14), %al
; CHECK-BASELINE-NEXT: andb %r12b, %al
; CHECK-BASELINE-NEXT: notb %r12b
; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-BASELINE-NEXT: andb %cl, %al
; CHECK-BASELINE-NEXT: notb %cl
+; CHECK-BASELINE-NEXT: movq %rdx, %rbx
; CHECK-BASELINE-NEXT: andb 17(%rdx), %cl
; CHECK-BASELINE-NEXT: orb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-BASELINE-NEXT: andb %cl, %al
; CHECK-BASELINE-NEXT: notb %cl
-; CHECK-BASELINE-NEXT: movq %rdx, %rbx
; CHECK-BASELINE-NEXT: andb 16(%rdx), %cl
; CHECK-BASELINE-NEXT: orb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-BASELINE-NEXT: movb 15(%r15), %cl
; CHECK-BASELINE-NEXT: movb 15(%r14), %al
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-BASELINE-NEXT: andb %cl, %al
; CHECK-BASELINE-NEXT: notb %cl
; CHECK-BASELINE-NEXT: andb 15(%rdx), %cl
; CHECK-SSE1-NEXT: movq %rcx, %r15
; CHECK-SSE1-NEXT: movq %rsi, %r14
; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-SSE1-NEXT: movb 15(%rcx), %al
-; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 16(%rcx), %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 17(%rcx), %al
; CHECK-SSE1-NEXT: movb 20(%rcx), %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 21(%rcx), %r12b
-; CHECK-SSE1-NEXT: movb 22(%rcx), %r10b
-; CHECK-SSE1-NEXT: movb 23(%rcx), %r11b
-; CHECK-SSE1-NEXT: movb 24(%rcx), %bpl
-; CHECK-SSE1-NEXT: movb 25(%rcx), %r13b
-; CHECK-SSE1-NEXT: movb 26(%rcx), %r9b
+; CHECK-SSE1-NEXT: movb 22(%rcx), %r9b
+; CHECK-SSE1-NEXT: movb 23(%rcx), %r10b
+; CHECK-SSE1-NEXT: movb 24(%rcx), %r11b
+; CHECK-SSE1-NEXT: movb 25(%rcx), %bpl
+; CHECK-SSE1-NEXT: movb 26(%rcx), %r13b
; CHECK-SSE1-NEXT: movb 27(%rcx), %r8b
; CHECK-SSE1-NEXT: movb 28(%rcx), %dil
; CHECK-SSE1-NEXT: movb 29(%rcx), %sil
; CHECK-SSE1-NEXT: orb %al, %r8b
; CHECK-SSE1-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 26(%r14), %al
-; CHECK-SSE1-NEXT: andb %r9b, %al
-; CHECK-SSE1-NEXT: notb %r9b
-; CHECK-SSE1-NEXT: andb 26(%rdx), %r9b
-; CHECK-SSE1-NEXT: orb %al, %r9b
-; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 25(%r14), %al
; CHECK-SSE1-NEXT: andb %r13b, %al
; CHECK-SSE1-NEXT: notb %r13b
-; CHECK-SSE1-NEXT: andb 25(%rdx), %r13b
+; CHECK-SSE1-NEXT: andb 26(%rdx), %r13b
; CHECK-SSE1-NEXT: orb %al, %r13b
; CHECK-SSE1-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 24(%r14), %al
+; CHECK-SSE1-NEXT: movb 25(%r14), %al
; CHECK-SSE1-NEXT: andb %bpl, %al
; CHECK-SSE1-NEXT: notb %bpl
-; CHECK-SSE1-NEXT: andb 24(%rdx), %bpl
+; CHECK-SSE1-NEXT: andb 25(%rdx), %bpl
; CHECK-SSE1-NEXT: orb %al, %bpl
; CHECK-SSE1-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 23(%r14), %al
+; CHECK-SSE1-NEXT: movb 24(%r14), %al
; CHECK-SSE1-NEXT: andb %r11b, %al
; CHECK-SSE1-NEXT: notb %r11b
-; CHECK-SSE1-NEXT: andb 23(%rdx), %r11b
+; CHECK-SSE1-NEXT: andb 24(%rdx), %r11b
; CHECK-SSE1-NEXT: orb %al, %r11b
; CHECK-SSE1-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 22(%r14), %al
+; CHECK-SSE1-NEXT: movb 23(%r14), %al
; CHECK-SSE1-NEXT: andb %r10b, %al
; CHECK-SSE1-NEXT: notb %r10b
-; CHECK-SSE1-NEXT: andb 22(%rdx), %r10b
+; CHECK-SSE1-NEXT: andb 23(%rdx), %r10b
; CHECK-SSE1-NEXT: orb %al, %r10b
; CHECK-SSE1-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-SSE1-NEXT: movb 22(%r14), %al
+; CHECK-SSE1-NEXT: andb %r9b, %al
+; CHECK-SSE1-NEXT: notb %r9b
+; CHECK-SSE1-NEXT: andb 22(%rdx), %r9b
+; CHECK-SSE1-NEXT: orb %al, %r9b
+; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 21(%r14), %al
; CHECK-SSE1-NEXT: andb %r12b, %al
; CHECK-SSE1-NEXT: notb %r12b
; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-SSE1-NEXT: andb %cl, %al
; CHECK-SSE1-NEXT: notb %cl
+; CHECK-SSE1-NEXT: movq %rdx, %rbx
; CHECK-SSE1-NEXT: andb 17(%rdx), %cl
; CHECK-SSE1-NEXT: orb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-SSE1-NEXT: andb %cl, %al
; CHECK-SSE1-NEXT: notb %cl
-; CHECK-SSE1-NEXT: movq %rdx, %rbx
; CHECK-SSE1-NEXT: andb 16(%rdx), %cl
; CHECK-SSE1-NEXT: orb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-SSE1-NEXT: movb 15(%r15), %cl
; CHECK-SSE1-NEXT: movb 15(%r14), %al
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-SSE1-NEXT: andb %cl, %al
; CHECK-SSE1-NEXT: notb %cl
; CHECK-SSE1-NEXT: andb 15(%rdx), %cl
; CHECK-BASELINE-NEXT: movq %rdx, %r13
; CHECK-BASELINE-NEXT: movq %rsi, %rbx
; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-BASELINE-NEXT: movb 16(%rdx), %r12b
-; CHECK-BASELINE-NEXT: movb 15(%rdx), %al
-; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-BASELINE-NEXT: movb 15(%rdx), %r12b
; CHECK-BASELINE-NEXT: movb 14(%rdx), %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 13(%rdx), %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 10(%rdx), %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 9(%rdx), %r10b
-; CHECK-BASELINE-NEXT: movb 8(%rdx), %r11b
-; CHECK-BASELINE-NEXT: movb 7(%rdx), %r9b
+; CHECK-BASELINE-NEXT: movb 9(%rdx), %r9b
+; CHECK-BASELINE-NEXT: movb 8(%rdx), %r10b
+; CHECK-BASELINE-NEXT: movb 7(%rdx), %r11b
; CHECK-BASELINE-NEXT: movb 6(%rdx), %r8b
; CHECK-BASELINE-NEXT: movb 5(%rdx), %bpl
-; CHECK-BASELINE-NEXT: movb 4(%rdx), %dil
-; CHECK-BASELINE-NEXT: movb 3(%rdx), %sil
+; CHECK-BASELINE-NEXT: movb 4(%rdx), %sil
+; CHECK-BASELINE-NEXT: movb 3(%rdx), %dil
; CHECK-BASELINE-NEXT: movb 2(%rdx), %r14b
; CHECK-BASELINE-NEXT: movb (%rdx), %al
; CHECK-BASELINE-NEXT: movb 1(%rdx), %r15b
; CHECK-BASELINE-NEXT: xorb %r14b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 3(%rbx), %al
-; CHECK-BASELINE-NEXT: xorb %sil, %al
+; CHECK-BASELINE-NEXT: xorb %dil, %al
; CHECK-BASELINE-NEXT: andb 3(%rcx), %al
-; CHECK-BASELINE-NEXT: xorb %sil, %al
+; CHECK-BASELINE-NEXT: xorb %dil, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 4(%rbx), %al
-; CHECK-BASELINE-NEXT: xorb %dil, %al
+; CHECK-BASELINE-NEXT: xorb %sil, %al
; CHECK-BASELINE-NEXT: andb 4(%rcx), %al
-; CHECK-BASELINE-NEXT: xorb %dil, %al
+; CHECK-BASELINE-NEXT: xorb %sil, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 5(%rbx), %al
; CHECK-BASELINE-NEXT: xorb %bpl, %al
; CHECK-BASELINE-NEXT: xorb %r8b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 7(%rbx), %al
-; CHECK-BASELINE-NEXT: xorb %r9b, %al
+; CHECK-BASELINE-NEXT: xorb %r11b, %al
; CHECK-BASELINE-NEXT: andb 7(%rcx), %al
-; CHECK-BASELINE-NEXT: xorb %r9b, %al
+; CHECK-BASELINE-NEXT: xorb %r11b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 8(%rbx), %al
-; CHECK-BASELINE-NEXT: xorb %r11b, %al
+; CHECK-BASELINE-NEXT: xorb %r10b, %al
; CHECK-BASELINE-NEXT: andb 8(%rcx), %al
-; CHECK-BASELINE-NEXT: xorb %r11b, %al
+; CHECK-BASELINE-NEXT: xorb %r10b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 9(%rbx), %al
-; CHECK-BASELINE-NEXT: xorb %r10b, %al
+; CHECK-BASELINE-NEXT: xorb %r9b, %al
; CHECK-BASELINE-NEXT: andb 9(%rcx), %al
-; CHECK-BASELINE-NEXT: xorb %r10b, %al
+; CHECK-BASELINE-NEXT: xorb %r9b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 10(%rbx), %dl
; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 15(%rbx), %dl
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
-; CHECK-BASELINE-NEXT: xorb %al, %dl
-; CHECK-BASELINE-NEXT: andb 15(%rcx), %dl
-; CHECK-BASELINE-NEXT: xorb %al, %dl
-; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 16(%rbx), %al
+; CHECK-BASELINE-NEXT: movb 15(%rbx), %al
; CHECK-BASELINE-NEXT: xorb %r12b, %al
-; CHECK-BASELINE-NEXT: andb 16(%rcx), %al
+; CHECK-BASELINE-NEXT: andb 15(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r12b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-BASELINE-NEXT: movb 16(%r13), %al
+; CHECK-BASELINE-NEXT: movb 16(%rbx), %dl
+; CHECK-BASELINE-NEXT: xorb %al, %dl
+; CHECK-BASELINE-NEXT: andb 16(%rcx), %dl
+; CHECK-BASELINE-NEXT: xorb %al, %dl
+; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 17(%r13), %al
; CHECK-BASELINE-NEXT: movb 17(%rbx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb 19(%rbx), %r12b
; CHECK-BASELINE-NEXT: xorb %al, %r12b
; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b
-; CHECK-BASELINE-NEXT: movq %rcx, %rdx
; CHECK-BASELINE-NEXT: xorb %al, %r12b
; CHECK-BASELINE-NEXT: movb 20(%r13), %al
-; CHECK-BASELINE-NEXT: movb 20(%rbx), %r14b
-; CHECK-BASELINE-NEXT: xorb %al, %r14b
-; CHECK-BASELINE-NEXT: andb 20(%rcx), %r14b
-; CHECK-BASELINE-NEXT: xorb %al, %r14b
-; CHECK-BASELINE-NEXT: movb 21(%r13), %al
-; CHECK-BASELINE-NEXT: movb 21(%rbx), %r15b
+; CHECK-BASELINE-NEXT: movb 20(%rbx), %r15b
; CHECK-BASELINE-NEXT: xorb %al, %r15b
-; CHECK-BASELINE-NEXT: andb 21(%rcx), %r15b
+; CHECK-BASELINE-NEXT: andb 20(%rcx), %r15b
+; CHECK-BASELINE-NEXT: movq %rcx, %rsi
; CHECK-BASELINE-NEXT: xorb %al, %r15b
+; CHECK-BASELINE-NEXT: movb 21(%r13), %al
+; CHECK-BASELINE-NEXT: movb 21(%rbx), %r14b
+; CHECK-BASELINE-NEXT: xorb %al, %r14b
+; CHECK-BASELINE-NEXT: andb 21(%rcx), %r14b
+; CHECK-BASELINE-NEXT: xorb %al, %r14b
; CHECK-BASELINE-NEXT: movb 22(%r13), %al
; CHECK-BASELINE-NEXT: movb 22(%rbx), %bpl
; CHECK-BASELINE-NEXT: xorb %al, %bpl
; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil
; CHECK-BASELINE-NEXT: xorb %al, %dil
; CHECK-BASELINE-NEXT: movb 28(%r13), %al
-; CHECK-BASELINE-NEXT: movb 28(%rbx), %sil
-; CHECK-BASELINE-NEXT: xorb %al, %sil
-; CHECK-BASELINE-NEXT: andb 28(%rcx), %sil
-; CHECK-BASELINE-NEXT: xorb %al, %sil
+; CHECK-BASELINE-NEXT: movb 28(%rbx), %dl
+; CHECK-BASELINE-NEXT: xorb %al, %dl
+; CHECK-BASELINE-NEXT: andb 28(%rcx), %dl
+; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb 29(%r13), %al
; CHECK-BASELINE-NEXT: movb 29(%rbx), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
-; CHECK-BASELINE-NEXT: andb 29(%rdx), %cl
+; CHECK-BASELINE-NEXT: andb 29(%rsi), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb 30(%r13), %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-NEXT: movb 30(%rbx), %al
; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
-; CHECK-BASELINE-NEXT: andb 30(%rdx), %al
+; CHECK-BASELINE-NEXT: andb 30(%rsi), %al
; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb 31(%r13), %r13b
; CHECK-BASELINE-NEXT: movb 31(%rbx), %bl
; CHECK-BASELINE-NEXT: xorb %r13b, %bl
-; CHECK-BASELINE-NEXT: andb 31(%rdx), %bl
+; CHECK-BASELINE-NEXT: andb 31(%rsi), %bl
; CHECK-BASELINE-NEXT: xorb %r13b, %bl
; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
; CHECK-BASELINE-NEXT: movb %bl, 31(%r13)
; CHECK-BASELINE-NEXT: movb %al, 30(%r13)
; CHECK-BASELINE-NEXT: movb %cl, 29(%r13)
-; CHECK-BASELINE-NEXT: movb %sil, 28(%r13)
+; CHECK-BASELINE-NEXT: movb %dl, 28(%r13)
; CHECK-BASELINE-NEXT: movb %dil, 27(%r13)
; CHECK-BASELINE-NEXT: movb %r8b, 26(%r13)
; CHECK-BASELINE-NEXT: movb %r9b, 25(%r13)
; CHECK-BASELINE-NEXT: movb %r10b, 24(%r13)
; CHECK-BASELINE-NEXT: movb %r11b, 23(%r13)
; CHECK-BASELINE-NEXT: movb %bpl, 22(%r13)
-; CHECK-BASELINE-NEXT: movb %r15b, 21(%r13)
-; CHECK-BASELINE-NEXT: movb %r14b, 20(%r13)
+; CHECK-BASELINE-NEXT: movb %r14b, 21(%r13)
+; CHECK-BASELINE-NEXT: movb %r15b, 20(%r13)
; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13)
; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
; CHECK-BASELINE-NEXT: movb %al, 18(%r13)
; CHECK-SSE1-NEXT: movq %rdx, %r13
; CHECK-SSE1-NEXT: movq %rsi, %rbx
; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-SSE1-NEXT: movb 16(%rdx), %r12b
-; CHECK-SSE1-NEXT: movb 15(%rdx), %al
-; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-SSE1-NEXT: movb 15(%rdx), %r12b
; CHECK-SSE1-NEXT: movb 14(%rdx), %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 13(%rdx), %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 10(%rdx), %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 9(%rdx), %r10b
-; CHECK-SSE1-NEXT: movb 8(%rdx), %r11b
-; CHECK-SSE1-NEXT: movb 7(%rdx), %r9b
+; CHECK-SSE1-NEXT: movb 9(%rdx), %r9b
+; CHECK-SSE1-NEXT: movb 8(%rdx), %r10b
+; CHECK-SSE1-NEXT: movb 7(%rdx), %r11b
; CHECK-SSE1-NEXT: movb 6(%rdx), %r8b
; CHECK-SSE1-NEXT: movb 5(%rdx), %bpl
-; CHECK-SSE1-NEXT: movb 4(%rdx), %dil
-; CHECK-SSE1-NEXT: movb 3(%rdx), %sil
+; CHECK-SSE1-NEXT: movb 4(%rdx), %sil
+; CHECK-SSE1-NEXT: movb 3(%rdx), %dil
; CHECK-SSE1-NEXT: movb 2(%rdx), %r14b
; CHECK-SSE1-NEXT: movb (%rdx), %al
; CHECK-SSE1-NEXT: movb 1(%rdx), %r15b
; CHECK-SSE1-NEXT: xorb %r14b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 3(%rbx), %al
-; CHECK-SSE1-NEXT: xorb %sil, %al
+; CHECK-SSE1-NEXT: xorb %dil, %al
; CHECK-SSE1-NEXT: andb 3(%rcx), %al
-; CHECK-SSE1-NEXT: xorb %sil, %al
+; CHECK-SSE1-NEXT: xorb %dil, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 4(%rbx), %al
-; CHECK-SSE1-NEXT: xorb %dil, %al
+; CHECK-SSE1-NEXT: xorb %sil, %al
; CHECK-SSE1-NEXT: andb 4(%rcx), %al
-; CHECK-SSE1-NEXT: xorb %dil, %al
+; CHECK-SSE1-NEXT: xorb %sil, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 5(%rbx), %al
; CHECK-SSE1-NEXT: xorb %bpl, %al
; CHECK-SSE1-NEXT: xorb %r8b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 7(%rbx), %al
-; CHECK-SSE1-NEXT: xorb %r9b, %al
+; CHECK-SSE1-NEXT: xorb %r11b, %al
; CHECK-SSE1-NEXT: andb 7(%rcx), %al
-; CHECK-SSE1-NEXT: xorb %r9b, %al
+; CHECK-SSE1-NEXT: xorb %r11b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 8(%rbx), %al
-; CHECK-SSE1-NEXT: xorb %r11b, %al
+; CHECK-SSE1-NEXT: xorb %r10b, %al
; CHECK-SSE1-NEXT: andb 8(%rcx), %al
-; CHECK-SSE1-NEXT: xorb %r11b, %al
+; CHECK-SSE1-NEXT: xorb %r10b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 9(%rbx), %al
-; CHECK-SSE1-NEXT: xorb %r10b, %al
+; CHECK-SSE1-NEXT: xorb %r9b, %al
; CHECK-SSE1-NEXT: andb 9(%rcx), %al
-; CHECK-SSE1-NEXT: xorb %r10b, %al
+; CHECK-SSE1-NEXT: xorb %r9b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 10(%rbx), %dl
; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
; CHECK-SSE1-NEXT: andb 14(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 15(%rbx), %dl
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
-; CHECK-SSE1-NEXT: xorb %al, %dl
-; CHECK-SSE1-NEXT: andb 15(%rcx), %dl
-; CHECK-SSE1-NEXT: xorb %al, %dl
-; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 16(%rbx), %al
+; CHECK-SSE1-NEXT: movb 15(%rbx), %al
; CHECK-SSE1-NEXT: xorb %r12b, %al
-; CHECK-SSE1-NEXT: andb 16(%rcx), %al
+; CHECK-SSE1-NEXT: andb 15(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r12b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; CHECK-SSE1-NEXT: movb 16(%r13), %al
+; CHECK-SSE1-NEXT: movb 16(%rbx), %dl
+; CHECK-SSE1-NEXT: xorb %al, %dl
+; CHECK-SSE1-NEXT: andb 16(%rcx), %dl
+; CHECK-SSE1-NEXT: xorb %al, %dl
+; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 17(%r13), %al
; CHECK-SSE1-NEXT: movb 17(%rbx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb 19(%rbx), %r12b
; CHECK-SSE1-NEXT: xorb %al, %r12b
; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b
-; CHECK-SSE1-NEXT: movq %rcx, %rdx
; CHECK-SSE1-NEXT: xorb %al, %r12b
; CHECK-SSE1-NEXT: movb 20(%r13), %al
-; CHECK-SSE1-NEXT: movb 20(%rbx), %r14b
-; CHECK-SSE1-NEXT: xorb %al, %r14b
-; CHECK-SSE1-NEXT: andb 20(%rcx), %r14b
-; CHECK-SSE1-NEXT: xorb %al, %r14b
-; CHECK-SSE1-NEXT: movb 21(%r13), %al
-; CHECK-SSE1-NEXT: movb 21(%rbx), %r15b
+; CHECK-SSE1-NEXT: movb 20(%rbx), %r15b
; CHECK-SSE1-NEXT: xorb %al, %r15b
-; CHECK-SSE1-NEXT: andb 21(%rcx), %r15b
+; CHECK-SSE1-NEXT: andb 20(%rcx), %r15b
+; CHECK-SSE1-NEXT: movq %rcx, %rsi
; CHECK-SSE1-NEXT: xorb %al, %r15b
+; CHECK-SSE1-NEXT: movb 21(%r13), %al
+; CHECK-SSE1-NEXT: movb 21(%rbx), %r14b
+; CHECK-SSE1-NEXT: xorb %al, %r14b
+; CHECK-SSE1-NEXT: andb 21(%rcx), %r14b
+; CHECK-SSE1-NEXT: xorb %al, %r14b
; CHECK-SSE1-NEXT: movb 22(%r13), %al
; CHECK-SSE1-NEXT: movb 22(%rbx), %bpl
; CHECK-SSE1-NEXT: xorb %al, %bpl
; CHECK-SSE1-NEXT: andb 27(%rcx), %dil
; CHECK-SSE1-NEXT: xorb %al, %dil
; CHECK-SSE1-NEXT: movb 28(%r13), %al
-; CHECK-SSE1-NEXT: movb 28(%rbx), %sil
-; CHECK-SSE1-NEXT: xorb %al, %sil
-; CHECK-SSE1-NEXT: andb 28(%rcx), %sil
-; CHECK-SSE1-NEXT: xorb %al, %sil
+; CHECK-SSE1-NEXT: movb 28(%rbx), %dl
+; CHECK-SSE1-NEXT: xorb %al, %dl
+; CHECK-SSE1-NEXT: andb 28(%rcx), %dl
+; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb 29(%r13), %al
; CHECK-SSE1-NEXT: movb 29(%rbx), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
-; CHECK-SSE1-NEXT: andb 29(%rdx), %cl
+; CHECK-SSE1-NEXT: andb 29(%rsi), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb 30(%r13), %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-SSE1-NEXT: movb 30(%rbx), %al
; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
-; CHECK-SSE1-NEXT: andb 30(%rdx), %al
+; CHECK-SSE1-NEXT: andb 30(%rsi), %al
; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb 31(%r13), %r13b
; CHECK-SSE1-NEXT: movb 31(%rbx), %bl
; CHECK-SSE1-NEXT: xorb %r13b, %bl
-; CHECK-SSE1-NEXT: andb 31(%rdx), %bl
+; CHECK-SSE1-NEXT: andb 31(%rsi), %bl
; CHECK-SSE1-NEXT: xorb %r13b, %bl
; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
; CHECK-SSE1-NEXT: movb %bl, 31(%r13)
; CHECK-SSE1-NEXT: movb %al, 30(%r13)
; CHECK-SSE1-NEXT: movb %cl, 29(%r13)
-; CHECK-SSE1-NEXT: movb %sil, 28(%r13)
+; CHECK-SSE1-NEXT: movb %dl, 28(%r13)
; CHECK-SSE1-NEXT: movb %dil, 27(%r13)
; CHECK-SSE1-NEXT: movb %r8b, 26(%r13)
; CHECK-SSE1-NEXT: movb %r9b, 25(%r13)
; CHECK-SSE1-NEXT: movb %r10b, 24(%r13)
; CHECK-SSE1-NEXT: movb %r11b, 23(%r13)
; CHECK-SSE1-NEXT: movb %bpl, 22(%r13)
-; CHECK-SSE1-NEXT: movb %r15b, 21(%r13)
-; CHECK-SSE1-NEXT: movb %r14b, 20(%r13)
+; CHECK-SSE1-NEXT: movb %r14b, 21(%r13)
+; CHECK-SSE1-NEXT: movb %r15b, 20(%r13)
; CHECK-SSE1-NEXT: movb %r12b, 19(%r13)
; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
; CHECK-SSE1-NEXT: movb %al, 18(%r13)