case X86::CVTSI2SD64rm:
case X86::CVTSD2SSrr:
case X86::CVTSD2SSrm:
- case X86::Int_CVTSD2SSrr:
- case X86::Int_CVTSD2SSrm:
case X86::CVTSS2SDrr:
case X86::CVTSS2SDrm:
- case X86::Int_CVTSS2SDrr:
- case X86::Int_CVTSS2SDrm:
case X86::MOVHPDrm:
case X86::MOVHPSrm:
case X86::MOVLPDrm:
case X86::RCPSSm_Int:
case X86::ROUNDSDr:
case X86::ROUNDSDm:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSDm_Int:
case X86::ROUNDSSr:
case X86::ROUNDSSm:
- case X86::ROUNDSSr_Int:
- case X86::ROUNDSSm_Int:
case X86::RSQRTSSr:
case X86::RSQRTSSm:
case X86::RSQRTSSr_Int:
; X32-LABEL: test_mm_cvtsd_ss_load:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movaps (%eax), %xmm1
-; X32-NEXT: cvtsd2ss %xmm1, %xmm0
+; X32-NEXT: cvtsd2ss (%eax), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtsd_ss_load:
; X64: # BB#0:
-; X64-NEXT: movaps (%rdi), %xmm1
-; X64-NEXT: cvtsd2ss %xmm1, %xmm0
+; X64-NEXT: cvtsd2ss (%rdi), %xmm0
; X64-NEXT: retq
%a1 = load <2 x double>, <2 x double>* %p1
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
; SSE: ## BB#0:
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT: movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1]
+; SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00]
; SSE-NEXT: retl ## encoding: [0xc3]
;
; VCHECK-LABEL: test_x86_sse2_cvtsd2ss_load:
; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
; SSE: ## BB#0:
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT: movaps (%eax), %xmm1 ## encoding: [0x0f,0x28,0x08]
-; SSE-NEXT: cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
+; SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
; SSE-NEXT: retl ## encoding: [0xc3]
;
; VCHECK-LABEL: test_x86_sse2_cvtss2sd_load:
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) {
+; SSE41-LABEL: test_x86_sse41_round_sd_load:
+; SSE41: ## BB#0:
+; SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; SSE41-NEXT: roundsd $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x00,0x07]
+; SSE41-NEXT: retl ## encoding: [0xc3]
+;
+; VCHECK-LABEL: test_x86_sse41_round_sd_load:
+; VCHECK: ## BB#0:
+; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; VCHECK-NEXT: vroundsd $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07]
+; VCHECK-NEXT: retl ## encoding: [0xc3]
+ %a1b = load <2 x double>, <2 x double>* %a1
+ %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+
+
define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
; SSE41-LABEL: test_x86_sse41_round_ss:
; SSE41: ## BB#0:
define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) {
; CHECK-LABEL: load_fold_cvtss2sd_int:
; CHECK: ## BB#0:
-; CHECK-NEXT: movaps (%rdi), %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtss2sd %xmm1, %xmm0
+; CHECK-NEXT: cvtss2sd (%rdi), %xmm0
; CHECK-NEXT: retq
%ld = load <4 x float>, <4 x float> *%a
%x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movaps %xmm0, (%esp) ## 16-byte Spill
; X32-NEXT: calll _f
-; X32-NEXT: movaps (%esp), %xmm1 ## 16-byte Reload
-; X32-NEXT: roundss $4, %xmm1, %xmm0
+; X32-NEXT: roundss $4, (%esp), %xmm0 ## 16-byte Folded Reload
; X32-NEXT: addl $28, %esp
; X32-NEXT: retl
;
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill
; X64-NEXT: callq _f
-; X64-NEXT: movaps (%rsp), %xmm1 ## 16-byte Reload
-; X64-NEXT: roundss $4, %xmm1, %xmm0
+; X64-NEXT: roundss $4, (%rsp), %xmm0 ## 16-byte Folded Reload
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
;