define double @single_to_double_rm(float* %x) {
; SSE-LABEL: single_to_double_rm:
; SSE: # BB#0: # %entry
-; SSE-NEXT: cvtss2sd (%rdi), %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: cvtss2sd %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: single_to_double_rm:
ret double %conv
}
+define double @single_to_double_rm_optsize(float* %x) optsize {
+; SSE-LABEL: single_to_double_rm_optsize:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: cvtss2sd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: single_to_double_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load float, float* %x, align 4
+ %conv = fpext float %0 to double
+ ret double %conv
+}
+
define float @double_to_single_rm(double* %x) {
; SSE-LABEL: double_to_single_rm:
; SSE: # BB#0: # %entry
-; SSE-NEXT: cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: cvtsd2ss %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: double_to_single_rm:
%conv = fptrunc double %0 to float
ret float %conv
}
+
+define float @double_to_single_rm_optsize(double* %x) optsize {
+; SSE-LABEL: double_to_single_rm_optsize:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: double_to_single_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load double, double* %x, align 8
+ %conv = fptrunc double %0 to float
+ ret float %conv
+}
define double @long_to_double_rm(i64* %a) {
; SSE2-LABEL: long_to_double_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_double_rm:
ret double %1
}
+define double @long_to_double_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_double_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: long_to_double_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load i64, i64* %a
+ %1 = sitofp i64 %0 to double
+ ret double %1
+}
+
define float @long_to_float_rr(i64 %a) {
; SSE2-LABEL: long_to_float_rr:
; SSE2: # BB#0: # %entry
define float @long_to_float_rm(i64* %a) {
; SSE2-LABEL: long_to_float_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT: movq (%rdi), %rax
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_float_rm:
%1 = sitofp i64 %0 to float
ret float %1
}
+
+define float @long_to_float_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_float_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: long_to_float_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %0 = load i64, i64* %a
+ %1 = sitofp i64 %0 to float
+ ret float %1
+}
define double @int_to_double_rm(i32* %a) {
; SSE2-LABEL: int_to_double_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT: movl (%rdi), %eax
+; SSE2-NEXT: cvtsi2sdl %eax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: int_to_double_rm:
ret double %1
}
+define double @int_to_double_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_double_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: int_to_double_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; SSE2_X86-LABEL: int_to_double_rm_optsize:
+; SSE2_X86: # BB#0: # %entry
+; SSE2_X86-NEXT: pushl %ebp
+; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT: .cfi_offset %ebp, -8
+; SSE2_X86-NEXT: movl %esp, %ebp
+; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp
+; SSE2_X86-NEXT: andl $-8, %esp
+; SSE2_X86-NEXT: subl $8, %esp
+; SSE2_X86-NEXT: movl 8(%ebp), %eax
+; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0
+; SSE2_X86-NEXT: movsd %xmm0, (%esp)
+; SSE2_X86-NEXT: fldl (%esp)
+; SSE2_X86-NEXT: movl %ebp, %esp
+; SSE2_X86-NEXT: popl %ebp
+; SSE2_X86-NEXT: .cfi_def_cfa %esp, 4
+; SSE2_X86-NEXT: retl
+;
+; AVX_X86-LABEL: int_to_double_rm_optsize:
+; AVX_X86: # BB#0: # %entry
+; AVX_X86-NEXT: pushl %ebp
+; AVX_X86-NEXT: .cfi_def_cfa_offset 8
+; AVX_X86-NEXT: .cfi_offset %ebp, -8
+; AVX_X86-NEXT: movl %esp, %ebp
+; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT: andl $-8, %esp
+; AVX_X86-NEXT: subl $8, %esp
+; AVX_X86-NEXT: movl 8(%ebp), %eax
+; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT: fldl (%esp)
+; AVX_X86-NEXT: movl %ebp, %esp
+; AVX_X86-NEXT: popl %ebp
+; AVX_X86-NEXT: .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT: retl
+entry:
+ %0 = load i32, i32* %a
+ %1 = sitofp i32 %0 to double
+ ret double %1
+}
+
define float @int_to_float_rr(i32 %a) {
; SSE2-LABEL: int_to_float_rr:
; SSE2: # BB#0: # %entry
define float @int_to_float_rm(i32* %a) {
; SSE2-LABEL: int_to_float_rm:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT: movl (%rdi), %eax
+; SSE2-NEXT: cvtsi2ssl %eax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: int_to_float_rm:
%1 = sitofp i32 %0 to float
ret float %1
}
+
+define float @int_to_float_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_float_rm_optsize:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: int_to_float_rm_optsize:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; SSE2_X86-LABEL: int_to_float_rm_optsize:
+; SSE2_X86: # BB#0: # %entry
+; SSE2_X86-NEXT: pushl %eax
+; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2_X86-NEXT: cvtsi2ssl (%eax), %xmm0
+; SSE2_X86-NEXT: movss %xmm0, (%esp)
+; SSE2_X86-NEXT: flds (%esp)
+; SSE2_X86-NEXT: popl %eax
+; SSE2_X86-NEXT: .cfi_def_cfa_offset 4
+; SSE2_X86-NEXT: retl
+;
+; AVX_X86-LABEL: int_to_float_rm_optsize:
+; AVX_X86: # BB#0: # %entry
+; AVX_X86-NEXT: pushl %eax
+; AVX_X86-NEXT: .cfi_def_cfa_offset 8
+; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT: vmovss %xmm0, (%esp)
+; AVX_X86-NEXT: flds (%esp)
+; AVX_X86-NEXT: popl %eax
+; AVX_X86-NEXT: .cfi_def_cfa_offset 4
+; AVX_X86-NEXT: retl
+entry:
+ %0 = load i32, i32* %a
+ %1 = sitofp i32 %0 to float
+ ret float %1
+}