setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
- // Long double always uses X87, except f128 in MMX.
+ // Long double always uses X87, except f128 in SSE.
if (UseX87) {
- if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
+ if (Subtarget.is64Bit() && Subtarget.hasSSE1()) {
addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=-sse | FileCheck %s --check-prefix=X64-NOSSE
+; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE
+
+; Note: This test is testing that the lowering for atomics matches what we
+; currently emit for non-atomics + the atomic restriction. The presence of
+; particular lowering detail in these tests should not be read as requiring
+; that detail for correctness unless it's related to the atomicity itself.
+; (Specifically, there were reviewer questions about the lowering for halfs
+; and their calling convention which remain unresolved.)
+
+define void @store_fp128(fp128* %fptr, fp128 %v) {
+; X64-NOSSE-LABEL: store_fp128:
+; X64-NOSSE: # %bb.0:
+; X64-NOSSE-NEXT: pushq %rax
+; X64-NOSSE-NEXT: .cfi_def_cfa_offset 16
+; X64-NOSSE-NEXT: callq __sync_lock_test_and_set_16
+; X64-NOSSE-NEXT: popq %rax
+; X64-NOSSE-NEXT: .cfi_def_cfa_offset 8
+; X64-NOSSE-NEXT: retq
+;
+; X64-SSE-LABEL: store_fp128:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: subq $24, %rsp
+; X64-SSE-NEXT: .cfi_def_cfa_offset 32
+; X64-SSE-NEXT: movaps %xmm0, (%rsp)
+; X64-SSE-NEXT: movq (%rsp), %rsi
+; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; X64-SSE-NEXT: callq __sync_lock_test_and_set_16
+; X64-SSE-NEXT: addq $24, %rsp
+; X64-SSE-NEXT: .cfi_def_cfa_offset 8
+; X64-SSE-NEXT: retq
+ store atomic fp128 %v, fp128* %fptr unordered, align 16
+ ret void
+}
; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -56
; X86-NOSSE-NEXT: retl
;
-; X64-LABEL: store_fp128:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: callq __sync_lock_test_and_set_16
-; X64-NEXT: popq %rax
-; X64-NEXT: .cfi_def_cfa_offset 8
-; X64-NEXT: retq
+; X64-SSE-LABEL: store_fp128:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: subq $24, %rsp
+; X64-SSE-NEXT: .cfi_def_cfa_offset 32
+; X64-SSE-NEXT: movaps %xmm0, (%rsp)
+; X64-SSE-NEXT: movq (%rsp), %rsi
+; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; X64-SSE-NEXT: callq __sync_lock_test_and_set_16
+; X64-SSE-NEXT: addq $24, %rsp
+; X64-SSE-NEXT: .cfi_def_cfa_offset 8
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: store_fp128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: subq $24, %rsp
+; X64-AVX-NEXT: .cfi_def_cfa_offset 32
+; X64-AVX-NEXT: vmovaps %xmm0, (%rsp)
+; X64-AVX-NEXT: movq (%rsp), %rsi
+; X64-AVX-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; X64-AVX-NEXT: callq __sync_lock_test_and_set_16
+; X64-AVX-NEXT: addq $24, %rsp
+; X64-AVX-NEXT: .cfi_def_cfa_offset 8
+; X64-AVX-NEXT: retq
store atomic fp128 %v, fp128* %fptr unordered, align 16
ret void
}
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
; X86-NOSSE-NEXT: retl $4
;
-; X64-LABEL: load_fp128:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: xorl %esi, %esi
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: xorl %r8d, %r8d
-; X64-NEXT: callq __sync_val_compare_and_swap_16
-; X64-NEXT: popq %rcx
-; X64-NEXT: .cfi_def_cfa_offset 8
-; X64-NEXT: retq
+; X64-SSE-LABEL: load_fp128:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: subq $24, %rsp
+; X64-SSE-NEXT: .cfi_def_cfa_offset 32
+; X64-SSE-NEXT: xorl %esi, %esi
+; X64-SSE-NEXT: xorl %edx, %edx
+; X64-SSE-NEXT: xorl %ecx, %ecx
+; X64-SSE-NEXT: xorl %r8d, %r8d
+; X64-SSE-NEXT: callq __sync_val_compare_and_swap_16
+; X64-SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; X64-SSE-NEXT: movq %rax, (%rsp)
+; X64-SSE-NEXT: movaps (%rsp), %xmm0
+; X64-SSE-NEXT: addq $24, %rsp
+; X64-SSE-NEXT: .cfi_def_cfa_offset 8
+; X64-SSE-NEXT: retq
+;
+; X64-AVX-LABEL: load_fp128:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: subq $24, %rsp
+; X64-AVX-NEXT: .cfi_def_cfa_offset 32
+; X64-AVX-NEXT: xorl %esi, %esi
+; X64-AVX-NEXT: xorl %edx, %edx
+; X64-AVX-NEXT: xorl %ecx, %ecx
+; X64-AVX-NEXT: xorl %r8d, %r8d
+; X64-AVX-NEXT: callq __sync_val_compare_and_swap_16
+; X64-AVX-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT: movq %rax, (%rsp)
+; X64-AVX-NEXT: vmovaps (%rsp), %xmm0
+; X64-AVX-NEXT: addq $24, %rsp
+; X64-AVX-NEXT: .cfi_def_cfa_offset 8
+; X64-AVX-NEXT: retq
%v = load atomic fp128, fp128* %fptr unordered, align 16
ret fp128 %v
}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 -mattr=-sse | FileCheck %s --check-prefix=NOSSE
; FIXME: This test has a fatal error in 32-bit mode
; CHECK-LABEL: atomic_fetch_swapf128:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movq %rsi, %rcx
-; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi
-; CHECK-NEXT: movq (%rsi), %rax
-; CHECK-NEXT: movq 8(%rsi), %rdx
+; CHECK-NEXT: movaps (%rsi), %xmm1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: lock cmpxchg16b (%rsi)
+; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
; CHECK-NEXT: jne LBB0_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
+;
+; NOSSE-LABEL: atomic_fetch_swapf128:
+; NOSSE: ## %bb.0:
+; NOSSE-NEXT: pushq %rbx
+; NOSSE-NEXT: movq %rsi, %rcx
+; NOSSE-NEXT: movq %rdi, %rbx
+; NOSSE-NEXT: movq _fsc128@{{.*}}(%rip), %rsi
+; NOSSE-NEXT: movq (%rsi), %rax
+; NOSSE-NEXT: movq 8(%rsi), %rdx
+; NOSSE-NEXT: .p2align 4, 0x90
+; NOSSE-NEXT: LBB0_1: ## %atomicrmw.start
+; NOSSE-NEXT: ## =>This Inner Loop Header: Depth=1
+; NOSSE-NEXT: lock cmpxchg16b (%rsi)
+; NOSSE-NEXT: jne LBB0_1
+; NOSSE-NEXT: ## %bb.2: ## %atomicrmw.end
+; NOSSE-NEXT: popq %rbx
+; NOSSE-NEXT: retq
%t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire
ret void
}
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE41-X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=AVX-X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=AVX-X64
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE2-X64
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE2-X64
define void @extract_i8_0(i8* nocapture %dst, <16 x i8> %foo) nounwind {
; SSE2-X32-LABEL: extract_i8_0:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vpextrb $0, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_i8_0:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movd %xmm0, %eax
-; SSE-F128-NEXT: movb %al, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <16 x i8> %foo, i32 0
store i8 %vecext, i8* %dst, align 1
ret void
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vpextrb $3, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_i8_3:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movd %xmm0, %eax
-; SSE-F128-NEXT: shrl $24, %eax
-; SSE-F128-NEXT: movb %al, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <16 x i8> %foo, i32 3
store i8 %vecext, i8* %dst, align 1
ret void
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vpextrb $15, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_i8_15:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: pextrw $7, %xmm0, %eax
-; SSE-F128-NEXT: movb %ah, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <16 x i8> %foo, i32 15
store i8 %vecext, i8* %dst, align 1
ret void
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_i16_0:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movd %xmm0, %eax
-; SSE-F128-NEXT: movw %ax, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <8 x i16> %foo, i32 0
store i16 %vecext, i16* %dst, align 1
ret void
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vpextrw $7, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_i16_7:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: pextrw $7, %xmm0, %eax
-; SSE-F128-NEXT: movw %ax, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <8 x i16> %foo, i32 7
store i16 %vecext, i16* %dst, align 1
ret void
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_i32_3:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE-F128-NEXT: movd %xmm0, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <4 x i32> %foo, i32 3
store i32 %vecext, i32* %dst, align 1
ret void
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vpextrq $1, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_i64_1:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE-F128-NEXT: movq %xmm0, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <2 x i64> %foo, i32 1
store i64 %vecext, i64* %dst, align 1
ret void
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_f32_3:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE-F128-NEXT: movss %xmm0, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <4 x float> %foo, i32 3
store float %vecext, float* %dst, align 1
ret void
; SSE-X32-NEXT: popl %edi
; SSE-X32-NEXT: retl
;
-; SSE2-X64-LABEL: extract_f128_0:
-; SSE2-X64: # %bb.0:
-; SSE2-X64-NEXT: movq %rdx, 8(%rdi)
-; SSE2-X64-NEXT: movq %rsi, (%rdi)
-; SSE2-X64-NEXT: retq
-;
-; SSE41-X64-LABEL: extract_f128_0:
-; SSE41-X64: # %bb.0:
-; SSE41-X64-NEXT: movq %rdx, 8(%rdi)
-; SSE41-X64-NEXT: movq %rsi, (%rdi)
-; SSE41-X64-NEXT: retq
+; SSE-X64-LABEL: extract_f128_0:
+; SSE-X64: # %bb.0:
+; SSE-X64-NEXT: movups %xmm0, (%rdi)
+; SSE-X64-NEXT: retq
;
; AVX-X32-LABEL: extract_f128_0:
; AVX-X32: # %bb.0:
;
; AVX-X64-LABEL: extract_f128_0:
; AVX-X64: # %bb.0:
-; AVX-X64-NEXT: movq %rdx, 8(%rdi)
-; AVX-X64-NEXT: movq %rsi, (%rdi)
+; AVX-X64-NEXT: vmovups %xmm0, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_f128_0:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movups %xmm0, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <2 x fp128> %foo, i32 0
store fp128 %vecext, fp128* %dst, align 1
ret void
; SSE-X32-NEXT: popl %edi
; SSE-X32-NEXT: retl
;
-; SSE2-X64-LABEL: extract_f128_1:
-; SSE2-X64: # %bb.0:
-; SSE2-X64-NEXT: movq %r8, 8(%rdi)
-; SSE2-X64-NEXT: movq %rcx, (%rdi)
-; SSE2-X64-NEXT: retq
-;
-; SSE41-X64-LABEL: extract_f128_1:
-; SSE41-X64: # %bb.0:
-; SSE41-X64-NEXT: movq %r8, 8(%rdi)
-; SSE41-X64-NEXT: movq %rcx, (%rdi)
-; SSE41-X64-NEXT: retq
+; SSE-X64-LABEL: extract_f128_1:
+; SSE-X64: # %bb.0:
+; SSE-X64-NEXT: movups %xmm1, (%rdi)
+; SSE-X64-NEXT: retq
;
; AVX-X32-LABEL: extract_f128_1:
; AVX-X32: # %bb.0:
;
; AVX-X64-LABEL: extract_f128_1:
; AVX-X64: # %bb.0:
-; AVX-X64-NEXT: movq %r8, 8(%rdi)
-; AVX-X64-NEXT: movq %rcx, (%rdi)
+; AVX-X64-NEXT: vmovups %xmm1, (%rdi)
; AVX-X64-NEXT: retq
-;
-; SSE-F128-LABEL: extract_f128_1:
-; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movups %xmm1, (%rdi)
-; SSE-F128-NEXT: retq
%vecext = extractelement <2 x fp128> %foo, i32 1
store fp128 %vecext, fp128* %dst, align 1
ret void
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=-mmx | FileCheck %s --check-prefix=X64_NO_MMX
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefix=X64
; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+mmx | FileCheck %s --check-prefix=X32
; Check soft floating point conversion function calls.
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPExtF32_F128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64_NO_MMX-NEXT: callq __extendsftf2
-; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip)
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPExtF32_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPExtF64_F128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X64_NO_MMX-NEXT: callq __extenddftf2
-; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip)
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPExtF64_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPExtF80_F128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: subq $24, %rsp
-; X64_NO_MMX-NEXT: fldt {{.*}}(%rip)
-; X64_NO_MMX-NEXT: fstpt (%rsp)
-; X64_NO_MMX-NEXT: callq __extendxftf2
-; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip)
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: addq $24, %rsp
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPExtF80_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPToSIF128_I32:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi
-; X64_NO_MMX-NEXT: callq __fixtfsi
-; X64_NO_MMX-NEXT: movl %eax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPToSIF128_I32:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPToUIF128_U32:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi
-; X64_NO_MMX-NEXT: callq __fixunstfsi
-; X64_NO_MMX-NEXT: movl %eax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPToUIF128_U32:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPToSIF128_I64:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi
-; X64_NO_MMX-NEXT: callq __fixtfsi
-; X64_NO_MMX-NEXT: cltq
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPToSIF128_I64:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPToUIF128_U64:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi
-; X64_NO_MMX-NEXT: callq __fixunstfsi
-; X64_NO_MMX-NEXT: movl %eax, %eax
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPToUIF128_U64:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPTruncF128_F32:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi
-; X64_NO_MMX-NEXT: callq __trunctfsf2
-; X64_NO_MMX-NEXT: movss %xmm0, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPTruncF128_F32:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPTruncF128_F64:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi
-; X64_NO_MMX-NEXT: callq __trunctfdf2
-; X64_NO_MMX-NEXT: movsd %xmm0, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPTruncF128_F64:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestFPTruncF128_F80:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi
-; X64_NO_MMX-NEXT: callq __trunctfxf2
-; X64_NO_MMX-NEXT: fstpt {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestFPTruncF128_F80:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestSIToFPI32_F128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movl {{.*}}(%rip), %edi
-; X64_NO_MMX-NEXT: callq __floatsitf
-; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip)
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestSIToFPI32_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestUIToFPU32_F128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movl {{.*}}(%rip), %edi
-; X64_NO_MMX-NEXT: callq __floatunsitf
-; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip)
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestUIToFPU32_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestSIToFPI64_F128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: callq __floatditf
-; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip)
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestSIToFPI64_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X64-NEXT: popq %rax
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestUIToFPU64_F128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi
-; X64_NO_MMX-NEXT: callq __floatunditf
-; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip)
-; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip)
-; X64_NO_MMX-NEXT: popq %rax
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestUIToFPU64_F128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X64-NEXT: popq %rcx
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestConst128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movabsq $4611404543450677248, %rcx # imm = 0x3FFF000000000000
-; X64_NO_MMX-NEXT: xorl %edx, %edx
-; X64_NO_MMX-NEXT: callq __gttf2
-; X64_NO_MMX-NEXT: xorl %ecx, %ecx
-; X64_NO_MMX-NEXT: testl %eax, %eax
-; X64_NO_MMX-NEXT: setg %cl
-; X64_NO_MMX-NEXT: movl %ecx, %eax
-; X64_NO_MMX-NEXT: popq %rcx
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestConst128:
; X32: # %bb.0: # %entry
; X32-NEXT: subl $12, %esp
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestBits128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq %rdi, %rdx
-; X64_NO_MMX-NEXT: movq %rsi, %rcx
-; X64_NO_MMX-NEXT: callq __multf3
-; X64_NO_MMX-NEXT: movq %rax, %rdx
-; X64_NO_MMX-NEXT: shrq $32, %rdx
-; X64_NO_MMX-NEXT: xorl %ecx, %ecx
-; X64_NO_MMX-NEXT: orl %eax, %edx
-; X64_NO_MMX-NEXT: sete %cl
-; X64_NO_MMX-NEXT: movl %ecx, %eax
-; X64_NO_MMX-NEXT: popq %rcx
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestBits128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %edi
; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestPair128:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: movq %rsi, %rax
-; X64_NO_MMX-NEXT: addq $3, %rax
-; X64_NO_MMX-NEXT: adcq $0, %rdi
-; X64_NO_MMX-NEXT: movq %rdi, %rdx
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestPair128:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %edi
; X64-NEXT: .LBB17_2: # %cleanup
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: TestTruncCopysign:
-; X64_NO_MMX: # %bb.0: # %entry
-; X64_NO_MMX-NEXT: movl %edx, %ecx
-; X64_NO_MMX-NEXT: movq %rsi, %rdx
-; X64_NO_MMX-NEXT: movq %rdi, %rax
-; X64_NO_MMX-NEXT: cmpl $50001, %ecx # imm = 0xC351
-; X64_NO_MMX-NEXT: jl .LBB17_2
-; X64_NO_MMX-NEXT: # %bb.1: # %if.then
-; X64_NO_MMX-NEXT: pushq %rax
-; X64_NO_MMX-NEXT: movq %rax, %rdi
-; X64_NO_MMX-NEXT: movq %rdx, %rsi
-; X64_NO_MMX-NEXT: callq __trunctfdf2
-; X64_NO_MMX-NEXT: andps {{.*}}(%rip), %xmm0
-; X64_NO_MMX-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X64_NO_MMX-NEXT: orps %xmm1, %xmm0
-; X64_NO_MMX-NEXT: callq __extenddftf2
-; X64_NO_MMX-NEXT: addq $8, %rsp
-; X64_NO_MMX-NEXT: .LBB17_2: # %cleanup
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: TestTruncCopysign:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %edi
; X64-NEXT: sete %al
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: PR34866:
-; X64_NO_MMX: # %bb.0:
-; X64_NO_MMX-NEXT: orq %rsi, %rdi
-; X64_NO_MMX-NEXT: sete %al
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: PR34866:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X64-NEXT: sete %al
; X64-NEXT: retq
;
-; X64_NO_MMX-LABEL: PR34866_commute:
-; X64_NO_MMX: # %bb.0:
-; X64_NO_MMX-NEXT: orq %rsi, %rdi
-; X64_NO_MMX-NEXT: sete %al
-; X64_NO_MMX-NEXT: retq
-;
; X32-LABEL: PR34866_commute:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \
-; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \
-; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \
-; RUN: -enable-legalize-types-checking | FileCheck %s
-; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \
-; RUN: -enable-legalize-types-checking | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse \
+; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse \
+; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=-sse \
+; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=NOSSE
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=-sse \
+; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=NOSSE
define void @test_select(fp128* %p, fp128* %q, i1 zeroext %c) {
-; MMX-LABEL: test_select:
-; MMX: # %bb.0:
-; MMX-NEXT: testl %edx, %edx
-; MMX-NEXT: jne .LBB0_1
-; MMX-NEXT: # %bb.2:
-; MMX-NEXT: movaps {{.*}}(%rip), %xmm0
-; MMX-NEXT: movaps %xmm0, (%rsi)
-; MMX-NEXT: retq
-; MMX-NEXT: .LBB0_1:
-; MMX-NEXT: movups (%rdi), %xmm0
-; MMX-NEXT: movaps %xmm0, (%rsi)
-; MMX-NEXT: retq
+; SSE-LABEL: test_select:
+; SSE: # %bb.0:
+; SSE-NEXT: testl %edx, %edx
+; SSE-NEXT: jne .LBB0_1
+; SSE-NEXT: # %bb.2:
+; SSE-NEXT: movaps {{.*}}(%rip), %xmm0
+; SSE-NEXT: movaps %xmm0, (%rsi)
+; SSE-NEXT: retq
+; SSE-NEXT: .LBB0_1:
+; SSE-NEXT: movups (%rdi), %xmm0
+; SSE-NEXT: movaps %xmm0, (%rsi)
+; SSE-NEXT: retq
;
-; CHECK-LABEL: test_select:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testl %edx, %edx
-; CHECK-NEXT: cmovneq (%rdi), %rax
-; CHECK-NEXT: movabsq $9223231299366420480, %rcx # imm = 0x7FFF800000000000
-; CHECK-NEXT: cmovneq 8(%rdi), %rcx
-; CHECK-NEXT: movq %rcx, 8(%rsi)
-; CHECK-NEXT: movq %rax, (%rsi)
-; CHECK-NEXT: retq
+; NOSSE-LABEL: test_select:
+; NOSSE: # %bb.0:
+; NOSSE-NEXT: xorl %eax, %eax
+; NOSSE-NEXT: testl %edx, %edx
+; NOSSE-NEXT: cmovneq (%rdi), %rax
+; NOSSE-NEXT: movabsq $9223231299366420480, %rcx # imm = 0x7FFF800000000000
+; NOSSE-NEXT: cmovneq 8(%rdi), %rcx
+; NOSSE-NEXT: movq %rcx, 8(%rsi)
+; NOSSE-NEXT: movq %rax, (%rsi)
+; NOSSE-NEXT: retq
%a = load fp128, fp128* %p, align 2
%r = select i1 %c, fp128 %a, fp128 0xL00000000000000007FFF800000000000
store fp128 %r, fp128* %q
define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
; SSE-LABEL: fptosi_2f128_to_4i32:
; SSE: # %bb.0:
-; SSE-NEXT: pushq %rbp
-; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
-; SSE-NEXT: movq %rcx, %r14
-; SSE-NEXT: movq %rdx, %rbx
+; SSE-NEXT: subq $16, %rsp
+; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
; SSE-NEXT: callq __fixtfsi
-; SSE-NEXT: movl %eax, %ebp
-; SSE-NEXT: movq %rbx, %rdi
-; SSE-NEXT: movq %r14, %rsi
+; SSE-NEXT: movl %eax, %ebx
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; SSE-NEXT: callq __fixtfsi
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movd %ebp, %xmm1
+; SSE-NEXT: movd %ebx, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
+; SSE-NEXT: addq $16, %rsp
; SSE-NEXT: popq %rbx
-; SSE-NEXT: popq %r14
-; SSE-NEXT: popq %rbp
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f128_to_4i32:
; AVX: # %bb.0:
-; AVX-NEXT: pushq %rbp
-; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %rbx
-; AVX-NEXT: movq %rcx, %r14
-; AVX-NEXT: movq %rdx, %rbx
+; AVX-NEXT: subq $16, %rsp
+; AVX-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill
; AVX-NEXT: callq __fixtfsi
-; AVX-NEXT: movl %eax, %ebp
-; AVX-NEXT: movq %rbx, %rdi
-; AVX-NEXT: movq %r14, %rsi
+; AVX-NEXT: movl %eax, %ebx
+; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX-NEXT: callq __fixtfsi
; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vmovd %ebp, %xmm1
+; AVX-NEXT: vmovd %ebx, %xmm1
; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: addq $16, %rsp
; AVX-NEXT: popq %rbx
-; AVX-NEXT: popq %r14
-; AVX-NEXT: popq %rbp
; AVX-NEXT: retq
%cvt = fptosi <2 x fp128> %a to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>