We were forcing the latency of these instructions to 5 cycles, but every other scheduler model had them as 1 cycle. I'm sure I didn't get everything, but this gets a big portion.
llvm-svn: 329339
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
- "MOV(16|32|64)mr",
- "MOV8mi",
- "MOV8mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP32m",
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
- "MOV(16|32|64)mr",
- "MOV8mi",
- "MOV8mr",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP32m",
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup33], (instregex "MOV(8|16|32|64)mr",
- "MOVNTI_64mr",
- "MOVNTImr",
- "PUSH64i8",
- "PUSH(16|32|64)r",
- "VEXTRACTF128mr",
- "(V?)MOVAPD(Y?)mr",
- "(V?)MOVAPS(Y?)mr",
- "(V?)MOVDQA(Y?)mr",
- "(V?)MOVDQU(Y?)mr",
- "(V?)MOVHPDmr",
- "(V?)MOVHPSmr",
- "(V?)MOVLPDmr",
- "(V?)MOVLPSmr",
- "(V?)MOVNTDQ(Y?)mr",
- "(V?)MOVNTPD(Y?)mr",
- "(V?)MOVNTPS(Y?)mr",
- "(V?)MOVPDI2DImr",
- "(V?)MOVPQI2QImr",
- "(V?)MOVPQIto64mr",
- "(V?)MOVSDmr",
- "(V?)MOVSSmr",
- "(V?)MOVUPD(Y?)mr",
- "(V?)MOVUPS(Y?)mr")>;
+def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8",
+ "PUSH(16|32|64)r")>;
def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> {
let Latency = 7;
"VMASKMOVPS(Y?)mr")>;
def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
- let Latency = 5;
+ let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi",
- "STOSB",
+def: InstRW<[SBWriteResGroup40], (instregex "STOSB",
"STOSL",
"STOSQ",
"STOSW")>;
def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(8|16|32|64)rr")>;
def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
- let Latency = 5;
+ let Latency = 3;
let NumMicroOps = 4;
let ResourceCycles = [1,1,2];
}
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
- "MOV(8|16|32|64)mr",
- "MOV8mi",
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP32m",
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
- "MOV(16|32|64)mr",
- "MOV8mi",
- "MOV8mr",
"MOVAPDmr",
"MOVAPSmr",
"MOVDQAmr",
; GENERIC-LABEL: test_extractf128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_extractf128:
; SANDY: # %bb.0:
; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: vzeroupper # sched: [100:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movapd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movapd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movaps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movaps:
; GENERIC-LABEL: test_movntdq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
; SANDY-LABEL: test_movntdq:
; SANDY: # %bb.0:
; SANDY-NEXT: #APP
-; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: vzeroupper # sched: [100:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: test_movntpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movntpd:
; GENERIC-LABEL: test_movntps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntps:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movntps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movupd:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
-; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
+; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movupd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movups:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
-; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
+; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movups:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32tof64_loadstore:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f64tof32_loadstore:
define void @mov_test5(float %x, float* %y) {
; GENERIC-LABEL: mov_test5:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mov_test5:
define void @mov_test6(double %x, double* %y) {
; GENERIC-LABEL: mov_test6:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mov_test6:
define i8 @conv1(<8 x i1>* %R) {
; GENERIC-LABEL: conv1:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movb $-1, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [5:1.00]
+; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: jmp f2 # TAILCALL
;
; SKX-LABEL: f1:
; GENERIC-LABEL: store_i16_i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: store_i16_i1:
; GENERIC-LABEL: store_i8_i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: store_i8_i1:
; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 32
; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
+; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f32
; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 32
; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
+; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f64
; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: paddd (%ecx), %xmm0
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movdqa %xmm0, (%ecx)
-; CHECK-NEXT: movl (%ecx), %esi
-; CHECK-NEXT: movl 4(%ecx), %edi
-; CHECK-NEXT: shll $4, %edx
-; CHECK-NEXT: movl 8(%ecx), %ebx
-; CHECK-NEXT: movl 12(%ecx), %ecx
-; CHECK-NEXT: movl %esi, 12(%eax,%edx)
-; CHECK-NEXT: movl %edi, (%eax,%edx)
-; CHECK-NEXT: movl %ebx, 8(%eax,%edx)
-; CHECK-NEXT: movl %ecx, 4(%eax,%edx)
+; CHECK-NEXT: paddd (%edx), %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%edx)
+; CHECK-NEXT: movl (%edx), %esi
+; CHECK-NEXT: movl 4(%edx), %edi
+; CHECK-NEXT: shll $4, %ecx
+; CHECK-NEXT: movl 8(%edx), %ebx
+; CHECK-NEXT: movl 12(%edx), %edx
+; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
+; CHECK-NEXT: movl %edi, (%eax,%ecx)
+; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
+; CHECK-NEXT: movl %edx, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
-; CHECK-NEXT: ## -- End function
entry:
%0 = bitcast i32* %y to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 16
; CHECK-NEXT: andq %rdi, %rcx
; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
-; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: jmp foo # TAILCALL
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq %rax, (%rsp)
+; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rcx, (%rsp)
; CHECK-NEXT: movaps (%rsp), %xmm0
; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
; CHECK-NEXT: callq __lttf2
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
-; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq __gttf2
; CHECK-NEXT: movl %eax, %ebp
-; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: callq __subtf3
; CHECK-NEXT: testl %ebp, %ebp
; CHECK-NEXT: movaps (%rsp), %xmm2 # 16-byte Reload
; CHECK-NEXT: .LBB10_3: # %cleanup
; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: andps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: andps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: movaps %xmm2, (%rbx)
; CHECK-NEXT: movaps %xmm0, 16(%rbx)
; SSE1-LABEL: t1:
; SSE1: movaps _.str, %xmm0
-; SSE1: movb $0, 24(%esp)
; SSE1: movaps %xmm0
+; SSE1: movb $0, 24(%esp)
; SSE1: movl $0, 20(%esp)
; SSE1: movl $0, 16(%esp)
;
; TOPDOWN-LABEL: %for.body
; TOPDOWN: movl %{{.*}}, (
-; TOPDOWN-NOT: imull {{[0-9]*}}(
+; TOPDOWN: imull {{[0-9]*}}(
; TOPDOWN: movl %{{.*}}, 4(
-; TOPDOWN-NOT: imull {{[0-9]*}}(
+; TOPDOWN: imull {{[0-9]*}}(
; TOPDOWN: movl %{{.*}}, 8(
; TOPDOWN: movl %{{.*}}, 12(
; TOPDOWN-LABEL: %for.end
; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00]
; GENERIC-NEXT: movd %mm2, %ecx # sched: [1:0.33]
; GENERIC-NEXT: movd %mm0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movd:
; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00]
; SANDY-NEXT: movd %mm2, %ecx # sched: [1:0.33]
; SANDY-NEXT: movd %mm0, %eax # sched: [1:0.33]
-; SANDY-NEXT: movl %ecx, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_movd:
; CHECK-NEXT: movl struct_obj_3+{{.*}}(%rip), %eax
; CHECK-NEXT: movsbl {{.*}}(%rip), %ecx
; CHECK-NEXT: movzbl {{.*}}(%rip), %edx
-; CHECK-NEXT: movzbl {{.*}}(%rip), %esi
; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: leal (%rax,%rax), %edi
-; CHECK-NEXT: subl %ecx, %edi
-; CHECK-NEXT: subl %edx, %edi
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: notl %ecx
-; CHECK-NEXT: movzbl %cl, %ecx
-; CHECK-NEXT: movw %cx, struct_obj_12+{{.*}}(%rip)
+; CHECK-NEXT: leal (%rax,%rax), %esi
+; CHECK-NEXT: subl %ecx, %esi
+; CHECK-NEXT: subl %edx, %esi
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: cmovel %eax, %ecx
+; CHECK-NEXT: movzbl {{.*}}(%rip), %edx
; CHECK-NEXT: andl struct_obj_8+{{.*}}(%rip), %ecx
; CHECK-NEXT: andl $1, %ecx
; CHECK-NEXT: negl %ecx
-; CHECK-NEXT: andl %esi, %ecx
+; CHECK-NEXT: andl %edx, %ecx
; CHECK-NEXT: negl %ecx
; CHECK-NEXT: andl %eax, %ecx
; CHECK-NEXT: negl %ecx
-; CHECK-NEXT: testl %ecx, %edi
+; CHECK-NEXT: testl %ecx, %esi
+; CHECK-NEXT: notl %esi
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: movw %ax, struct_obj_12+{{.*}}(%rip)
; CHECK-NEXT: setne {{.*}}(%rip)
; CHECK-NEXT: retq
entry:
; SSE2-SCHEDULE-NEXT: movq %rsp, %rbp
; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_register %rbp
; SSE2-SCHEDULE-NEXT: fnstcw -4(%rbp)
-; SSE2-SCHEDULE-NEXT: fldt 16(%rbp)
; SSE2-SCHEDULE-NEXT: movzwl -4(%rbp), %eax
; SSE2-SCHEDULE-NEXT: movw $3199, -4(%rbp) ## imm = 0xC7F
; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp)
+; SSE2-SCHEDULE-NEXT: fldt 16(%rbp)
; SSE2-SCHEDULE-NEXT: movw %ax, -4(%rbp)
; SSE2-SCHEDULE-NEXT: fistl -8(%rbp)
; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp)
; SSE2-SCHEDULE-NEXT: movsd %xmm0, -64(%rbp)
; SSE2-SCHEDULE-NEXT: movsd %xmm0, -32(%rbp)
; SSE2-SCHEDULE-NEXT: fsubl -32(%rbp)
-; SSE2-SCHEDULE-NEXT: flds {{.*}}(%rip)
; SSE2-SCHEDULE-NEXT: fnstcw -2(%rbp)
-; SSE2-SCHEDULE-NEXT: fmul %st(0), %st(1)
+; SSE2-SCHEDULE-NEXT: flds {{.*}}(%rip)
; SSE2-SCHEDULE-NEXT: movzwl -2(%rbp), %eax
; SSE2-SCHEDULE-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F
; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp)
+; SSE2-SCHEDULE-NEXT: fmul %st(0), %st(1)
; SSE2-SCHEDULE-NEXT: movw %ax, -2(%rbp)
; SSE2-SCHEDULE-NEXT: fxch %st(1)
; SSE2-SCHEDULE-NEXT: fistl -12(%rbp)
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [5:1.00]
+; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem_b:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [5:1.00]
+; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem_b_optsize:
; GENERIC-LABEL: test_movnti:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: movntil %edi, (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: movntiq %rdx, (%rcx) # sched: [5:1.00]
+; GENERIC-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
+; GENERIC-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movnti:
; SANDY: # %bb.0:
; SANDY-NEXT: #APP
-; SANDY-NEXT: movntil %edi, (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: movntiq %rdx, (%rcx) # sched: [5:1.00]
+; SANDY-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
+; SANDY-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retq # sched: [1:1.00]
;
; GENERIC-NEXT: setge %dil # sched: [1:0.50]
; GENERIC-NEXT: setle %dil # sched: [1:0.50]
; GENERIC-NEXT: setg %dil # sched: [1:0.50]
-; GENERIC-NEXT: seto (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setno (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setb (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setae (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: sete (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setne (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setbe (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: seta (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: sets (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setns (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setp (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setnp (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setl (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setge (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setle (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: setg (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: seto (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setno (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setb (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setae (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: sete (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setne (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setbe (%rsi) # sched: [3:1.00]
+; GENERIC-NEXT: seta (%rsi) # sched: [3:1.00]
+; GENERIC-NEXT: sets (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setns (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setp (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setnp (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setl (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setge (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setle (%rsi) # sched: [2:1.00]
+; GENERIC-NEXT: setg (%rsi) # sched: [2:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-NEXT: setge %dil # sched: [1:0.50]
; SANDY-NEXT: setle %dil # sched: [1:0.50]
; SANDY-NEXT: setg %dil # sched: [1:0.50]
-; SANDY-NEXT: seto (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setno (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setb (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setae (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: sete (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setne (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setbe (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: seta (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: sets (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setns (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setp (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setnp (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setl (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setge (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setle (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: setg (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: seto (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setno (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setb (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setae (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: sete (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setne (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setbe (%rsi) # sched: [3:1.00]
+; SANDY-NEXT: seta (%rsi) # sched: [3:1.00]
+; SANDY-NEXT: sets (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setns (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setp (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setnp (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setl (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setge (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setle (%rsi) # sched: [2:1.00]
+; SANDY-NEXT: setg (%rsi) # sched: [2:1.00]
; SANDY-NEXT: #NO_APP
; SANDY-NEXT: retq # sched: [1:1.00]
;
define void @test_ldmxcsr(i32 %a0) {
; GENERIC-LABEL: test_ldmxcsr:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
;
; SANDY-SSE-LABEL: test_ldmxcsr:
; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SANDY-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_ldmxcsr:
; SANDY: # %bb.0:
-; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
+; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; GENERIC: # %bb.0:
; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movaps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movaps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movaps:
; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movhps:
; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movhps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movlps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movlps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movlps:
define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
; GENERIC-LABEL: test_movntps:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movntps:
;
; SANDY-SSE-LABEL: test_movntps:
; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntps:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movntps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movss_mem:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movss_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movss_mem:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movups:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movups:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movups:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movapd %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movapd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movapd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movapd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movdqa %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movdqa:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movdqa:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movdqu %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movdqu:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movdqu:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movdqu:
; GENERIC-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; GENERIC-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
; GENERIC-NEXT: movd %xmm2, %eax # sched: [2:1.00]
-; GENERIC-NEXT: movd %xmm1, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movd:
; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movd:
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movd:
; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
; GENERIC-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
; GENERIC-NEXT: movq %xmm2, %rax # sched: [2:1.00]
-; GENERIC-NEXT: movq %xmm1, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movd_64:
; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
-; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movd_64:
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movd_64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movhpd %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movhpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movhpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movhpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movlpd %xmm1, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movlpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movlpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movlpd:
; GENERIC-LABEL: test_movntdqa:
; GENERIC: # %bb.0:
; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movntdq %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movntdqa:
; SANDY-SSE-LABEL: test_movntdqa:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movntdqa:
; GENERIC-LABEL: test_movntpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movntpd %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movntpd:
; SANDY-SSE-LABEL: test_movntpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movntpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movntpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movq %xmm0, (%rdi) # sched: [5:1.00]
+; GENERIC-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movq_mem:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movq_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movq_mem:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
; GENERIC-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movsd %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movsd_mem:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movsd_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movsd_mem:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movupd %xmm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movupd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movupd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movupd: