From 45cd1851097c61b7eee73b93fee4f09fc2a57d3c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 2 Sep 2019 20:16:30 +0000 Subject: [PATCH] [X86] Enable fp128 as a legal type with SSE1 rather than with MMX. FP128 values are passed in xmm registers so should be asssociated with an SSE feature rather than MMX which uses a different set of registers. llc enables sse1 and sse2 by default with x86_64. But does not enable mmx. Clang enables all 3 features by default. I've tried to add command lines to test with -sse where possible, but any test that returns a value in an xmm register fails with a fatal error with -sse since we have no defined ABI for that scenario. llvm-svn: 370682 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +- llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll | 35 ++++ llvm/test/CodeGen/X86/atomic-non-integer.ll | 74 ++++++-- llvm/test/CodeGen/X86/atomicf128.ll | 32 +++- llvm/test/CodeGen/X86/extract-store.ll | 99 ++-------- llvm/test/CodeGen/X86/fp128-cast.ll | 216 +--------------------- llvm/test/CodeGen/X86/fp128-select.ll | 60 +++--- llvm/test/CodeGen/X86/vec_fp_to_int.ll | 32 ++-- 8 files changed, 175 insertions(+), 377 deletions(-) create mode 100644 llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3c188aa..f29d50f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -645,9 +645,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); - // Long double always uses X87, except f128 in MMX. + // Long double always uses X87, except f128 in SSE. if (UseX87) { - if (Subtarget.is64Bit() && Subtarget.hasMMX()) { + if (Subtarget.is64Bit() && Subtarget.hasSSE1()) { addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); diff --git a/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll b/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll new file mode 100644 index 0000000..a3028e9 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=-sse | FileCheck %s --check-prefix=X64-NOSSE +; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE + +; Note: This test is testing that the lowering for atomics matches what we +; currently emit for non-atomics + the atomic restriction. The presence of +; particular lowering detail in these tests should not be read as requiring +; that detail for correctness unless it's related to the atomicity itself. +; (Specifically, there were reviewer questions about the lowering for halfs +; and their calling convention which remain unresolved.) + +define void @store_fp128(fp128* %fptr, fp128 %v) { +; X64-NOSSE-LABEL: store_fp128: +; X64-NOSSE: # %bb.0: +; X64-NOSSE-NEXT: pushq %rax +; X64-NOSSE-NEXT: .cfi_def_cfa_offset 16 +; X64-NOSSE-NEXT: callq __sync_lock_test_and_set_16 +; X64-NOSSE-NEXT: popq %rax +; X64-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X64-NOSSE-NEXT: retq +; +; X64-SSE-LABEL: store_fp128: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 32 +; X64-SSE-NEXT: movaps %xmm0, (%rsp) +; X64-SSE-NEXT: movq (%rsp), %rsi +; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-SSE-NEXT: callq __sync_lock_test_and_set_16 +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 8 +; X64-SSE-NEXT: retq + store atomic fp128 %v, fp128* %fptr unordered, align 16 + ret void +} diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll index d40eb76..8fd96b7 100644 --- a/llvm/test/CodeGen/X86/atomic-non-integer.ll +++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll @@ -274,14 +274,29 @@ define void @store_fp128(fp128* %fptr, fp128 %v) { ; X86-NOSSE-NEXT: .cfi_adjust_cfa_offset -56 ; X86-NOSSE-NEXT: retl ; -; X64-LABEL: store_fp128: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: callq __sync_lock_test_and_set_16 -; X64-NEXT: popq %rax -; X64-NEXT: .cfi_def_cfa_offset 8 -; X64-NEXT: retq +; X64-SSE-LABEL: store_fp128: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 32 +; X64-SSE-NEXT: movaps %xmm0, (%rsp) +; X64-SSE-NEXT: movq (%rsp), %rsi +; X64-SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-SSE-NEXT: callq __sync_lock_test_and_set_16 +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 8 +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: store_fp128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: subq $24, %rsp +; X64-AVX-NEXT: .cfi_def_cfa_offset 32 +; X64-AVX-NEXT: vmovaps %xmm0, (%rsp) +; X64-AVX-NEXT: movq (%rsp), %rsi +; X64-AVX-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-AVX-NEXT: callq __sync_lock_test_and_set_16 +; X64-AVX-NEXT: addq $24, %rsp +; X64-AVX-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-NEXT: retq store atomic fp128 %v, fp128* %fptr unordered, align 16 ret void } @@ -636,18 +651,37 @@ define fp128 @load_fp128(fp128* %fptr) { ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl $4 ; -; X64-LABEL: load_fp128: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: xorl %esi, %esi -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: xorl %r8d, %r8d -; X64-NEXT: callq __sync_val_compare_and_swap_16 -; X64-NEXT: popq %rcx -; X64-NEXT: .cfi_def_cfa_offset 8 -; X64-NEXT: retq +; X64-SSE-LABEL: load_fp128: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 32 +; X64-SSE-NEXT: xorl %esi, %esi +; X64-SSE-NEXT: xorl %edx, %edx +; X64-SSE-NEXT: xorl %ecx, %ecx +; X64-SSE-NEXT: xorl %r8d, %r8d +; X64-SSE-NEXT: callq __sync_val_compare_and_swap_16 +; X64-SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movq %rax, (%rsp) +; X64-SSE-NEXT: movaps (%rsp), %xmm0 +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: .cfi_def_cfa_offset 8 +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: load_fp128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: subq $24, %rsp +; X64-AVX-NEXT: .cfi_def_cfa_offset 32 +; X64-AVX-NEXT: xorl %esi, %esi +; X64-AVX-NEXT: xorl %edx, %edx +; X64-AVX-NEXT: xorl %ecx, %ecx +; X64-AVX-NEXT: xorl %r8d, %r8d +; X64-AVX-NEXT: callq __sync_val_compare_and_swap_16 +; X64-AVX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-AVX-NEXT: movq %rax, (%rsp) +; X64-AVX-NEXT: vmovaps (%rsp), %xmm0 +; X64-AVX-NEXT: addq $24, %rsp +; X64-AVX-NEXT: .cfi_def_cfa_offset 8 +; X64-AVX-NEXT: retq %v = load atomic fp128, fp128* %fptr unordered, align 16 ret fp128 %v } diff --git a/llvm/test/CodeGen/X86/atomicf128.ll b/llvm/test/CodeGen/X86/atomicf128.ll index 6455fc5..cbec96c 100644 --- a/llvm/test/CodeGen/X86/atomicf128.ll +++ b/llvm/test/CodeGen/X86/atomicf128.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 -mattr=-sse | FileCheck %s --check-prefix=NOSSE ; FIXME: This test has a fatal error in 32-bit mode @@ -9,19 +10,42 @@ define void @atomic_fetch_swapf128(fp128 %x) nounwind { ; CHECK-LABEL: atomic_fetch_swapf128: ; CHECK: ## %bb.0: ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi -; CHECK-NEXT: movq (%rsi), %rax -; CHECK-NEXT: movq 8(%rsi), %rdx +; CHECK-NEXT: movaps (%rsi), %xmm1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_1: ## %atomicrmw.start ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rbx +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: lock cmpxchg16b (%rsi) +; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 ; CHECK-NEXT: jne LBB0_1 ; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; NOSSE-LABEL: atomic_fetch_swapf128: +; NOSSE: ## %bb.0: +; NOSSE-NEXT: pushq %rbx +; NOSSE-NEXT: movq %rsi, %rcx +; NOSSE-NEXT: movq %rdi, %rbx +; NOSSE-NEXT: movq _fsc128@{{.*}}(%rip), %rsi +; NOSSE-NEXT: movq (%rsi), %rax +; NOSSE-NEXT: movq 8(%rsi), %rdx +; NOSSE-NEXT: .p2align 4, 0x90 +; NOSSE-NEXT: LBB0_1: ## %atomicrmw.start +; NOSSE-NEXT: ## =>This Inner Loop Header: Depth=1 +; NOSSE-NEXT: lock cmpxchg16b (%rsi) +; NOSSE-NEXT: jne LBB0_1 +; NOSSE-NEXT: ## %bb.2: ## %atomicrmw.end +; NOSSE-NEXT: popq %rbx +; NOSSE-NEXT: retq %t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire ret void } diff --git a/llvm/test/CodeGen/X86/extract-store.ll b/llvm/test/CodeGen/X86/extract-store.ll index 4d557c9..c29fac6 100644 --- a/llvm/test/CodeGen/X86/extract-store.ll +++ b/llvm/test/CodeGen/X86/extract-store.ll @@ -5,8 +5,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE41-X64 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=AVX-X32 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=AVX-X64 -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128 -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE-F128 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE2-X64 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse -enable-legalize-types-checking | FileCheck %s --check-prefix=X64 --check-prefix=SSE-X64 --check-prefix=SSE2-X64 define void @extract_i8_0(i8* nocapture %dst, <16 x i8> %foo) nounwind { ; SSE2-X32-LABEL: extract_i8_0: @@ -43,12 +43,6 @@ define void @extract_i8_0(i8* nocapture %dst, <16 x i8> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vpextrb $0, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_i8_0: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: movd %xmm0, %eax -; SSE-F128-NEXT: movb %al, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <16 x i8> %foo, i32 0 store i8 %vecext, i8* %dst, align 1 ret void @@ -91,13 +85,6 @@ define void @extract_i8_3(i8* nocapture %dst, <16 x i8> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vpextrb $3, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_i8_3: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: movd %xmm0, %eax -; SSE-F128-NEXT: shrl $24, %eax -; SSE-F128-NEXT: movb %al, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <16 x i8> %foo, i32 3 store i8 %vecext, i8* %dst, align 1 ret void @@ -138,12 +125,6 @@ define void @extract_i8_15(i8* nocapture %dst, <16 x i8> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vpextrb $15, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_i8_15: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: pextrw $7, %xmm0, %eax -; SSE-F128-NEXT: movb %ah, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <16 x i8> %foo, i32 15 store i8 %vecext, i8* %dst, align 1 ret void @@ -184,12 +165,6 @@ define void @extract_i16_0(i16* nocapture %dst, <8 x i16> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_i16_0: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: movd %xmm0, %eax -; SSE-F128-NEXT: movw %ax, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <8 x i16> %foo, i32 0 store i16 %vecext, i16* %dst, align 1 ret void @@ -230,12 +205,6 @@ define void @extract_i16_7(i16* nocapture %dst, <8 x i16> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vpextrw $7, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_i16_7: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: pextrw $7, %xmm0, %eax -; SSE-F128-NEXT: movw %ax, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <8 x i16> %foo, i32 7 store i16 %vecext, i16* %dst, align 1 ret void @@ -303,12 +272,6 @@ define void @extract_i32_3(i32* nocapture %dst, <4 x i32> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_i32_3: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE-F128-NEXT: movd %xmm0, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <4 x i32> %foo, i32 3 store i32 %vecext, i32* %dst, align 1 ret void @@ -371,12 +334,6 @@ define void @extract_i64_1(i64* nocapture %dst, <2 x i64> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vpextrq $1, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_i64_1: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE-F128-NEXT: movq %xmm0, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <2 x i64> %foo, i32 1 store i64 %vecext, i64* %dst, align 1 ret void @@ -444,12 +401,6 @@ define void @extract_f32_3(float* nocapture %dst, <4 x float> %foo) nounwind { ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_f32_3: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE-F128-NEXT: movss %xmm0, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <4 x float> %foo, i32 3 store float %vecext, float* %dst, align 1 ret void @@ -527,17 +478,10 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind { ; SSE-X32-NEXT: popl %edi ; SSE-X32-NEXT: retl ; -; SSE2-X64-LABEL: extract_f128_0: -; SSE2-X64: # %bb.0: -; SSE2-X64-NEXT: movq %rdx, 8(%rdi) -; SSE2-X64-NEXT: movq %rsi, (%rdi) -; SSE2-X64-NEXT: retq -; -; SSE41-X64-LABEL: extract_f128_0: -; SSE41-X64: # %bb.0: -; SSE41-X64-NEXT: movq %rdx, 8(%rdi) -; SSE41-X64-NEXT: movq %rsi, (%rdi) -; SSE41-X64-NEXT: retq +; SSE-X64-LABEL: extract_f128_0: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movups %xmm0, (%rdi) +; SSE-X64-NEXT: retq ; ; AVX-X32-LABEL: extract_f128_0: ; AVX-X32: # %bb.0: @@ -548,14 +492,8 @@ define void @extract_f128_0(fp128* nocapture %dst, <2 x fp128> %foo) nounwind { ; ; AVX-X64-LABEL: extract_f128_0: ; AVX-X64: # %bb.0: -; AVX-X64-NEXT: movq %rdx, 8(%rdi) -; AVX-X64-NEXT: movq %rsi, (%rdi) +; AVX-X64-NEXT: vmovups %xmm0, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_f128_0: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: movups %xmm0, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <2 x fp128> %foo, i32 0 store fp128 %vecext, fp128* %dst, align 1 ret void @@ -579,17 +517,10 @@ define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind { ; SSE-X32-NEXT: popl %edi ; SSE-X32-NEXT: retl ; -; SSE2-X64-LABEL: extract_f128_1: -; SSE2-X64: # %bb.0: -; SSE2-X64-NEXT: movq %r8, 8(%rdi) -; SSE2-X64-NEXT: movq %rcx, (%rdi) -; SSE2-X64-NEXT: retq -; -; SSE41-X64-LABEL: extract_f128_1: -; SSE41-X64: # %bb.0: -; SSE41-X64-NEXT: movq %r8, 8(%rdi) -; SSE41-X64-NEXT: movq %rcx, (%rdi) -; SSE41-X64-NEXT: retq +; SSE-X64-LABEL: extract_f128_1: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movups %xmm1, (%rdi) +; SSE-X64-NEXT: retq ; ; AVX-X32-LABEL: extract_f128_1: ; AVX-X32: # %bb.0: @@ -600,14 +531,8 @@ define void @extract_f128_1(fp128* nocapture %dst, <2 x fp128> %foo) nounwind { ; ; AVX-X64-LABEL: extract_f128_1: ; AVX-X64: # %bb.0: -; AVX-X64-NEXT: movq %r8, 8(%rdi) -; AVX-X64-NEXT: movq %rcx, (%rdi) +; AVX-X64-NEXT: vmovups %xmm1, (%rdi) ; AVX-X64-NEXT: retq -; -; SSE-F128-LABEL: extract_f128_1: -; SSE-F128: # %bb.0: -; SSE-F128-NEXT: movups %xmm1, (%rdi) -; SSE-F128-NEXT: retq %vecext = extractelement <2 x fp128> %foo, i32 1 store fp128 %vecext, fp128* %dst, align 1 ret void diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index f58bee3..8f91671 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=-mmx | FileCheck %s --check-prefix=X64_NO_MMX +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+mmx | FileCheck %s --check-prefix=X32 ; Check soft floating point conversion function calls. @@ -25,16 +24,6 @@ define void @TestFPExtF32_F128() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPExtF32_F128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64_NO_MMX-NEXT: callq __extendsftf2 -; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip) -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPExtF32_F128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi @@ -73,16 +62,6 @@ define void @TestFPExtF64_F128() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPExtF64_F128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64_NO_MMX-NEXT: callq __extenddftf2 -; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip) -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPExtF64_F128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi @@ -122,17 +101,6 @@ define void @TestFPExtF80_F128() nounwind { ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPExtF80_F128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: subq $24, %rsp -; X64_NO_MMX-NEXT: fldt {{.*}}(%rip) -; X64_NO_MMX-NEXT: fstpt (%rsp) -; X64_NO_MMX-NEXT: callq __extendxftf2 -; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip) -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: addq $24, %rsp -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPExtF80_F128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi @@ -171,16 +139,6 @@ define void @TestFPToSIF128_I32() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPToSIF128_I32: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi -; X64_NO_MMX-NEXT: callq __fixtfsi -; X64_NO_MMX-NEXT: movl %eax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPToSIF128_I32: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -210,16 +168,6 @@ define void @TestFPToUIF128_U32() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPToUIF128_U32: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi -; X64_NO_MMX-NEXT: callq __fixunstfsi -; X64_NO_MMX-NEXT: movl %eax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPToUIF128_U32: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -250,17 +198,6 @@ define void @TestFPToSIF128_I64() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPToSIF128_I64: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi -; X64_NO_MMX-NEXT: callq __fixtfsi -; X64_NO_MMX-NEXT: cltq -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPToSIF128_I64: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -294,17 +231,6 @@ define void @TestFPToUIF128_U64() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPToUIF128_U64: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi -; X64_NO_MMX-NEXT: callq __fixunstfsi -; X64_NO_MMX-NEXT: movl %eax, %eax -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPToUIF128_U64: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -336,16 +262,6 @@ define void @TestFPTruncF128_F32() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPTruncF128_F32: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi -; X64_NO_MMX-NEXT: callq __trunctfsf2 -; X64_NO_MMX-NEXT: movss %xmm0, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPTruncF128_F32: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -375,16 +291,6 @@ define void @TestFPTruncF128_F64() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPTruncF128_F64: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi -; X64_NO_MMX-NEXT: callq __trunctfdf2 -; X64_NO_MMX-NEXT: movsd %xmm0, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPTruncF128_F64: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -418,16 +324,6 @@ define void @TestFPTruncF128_F80() nounwind { ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestFPTruncF128_F80: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: movq vf128+{{.*}}(%rip), %rsi -; X64_NO_MMX-NEXT: callq __trunctfxf2 -; X64_NO_MMX-NEXT: fstpt {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestFPTruncF128_F80: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -457,16 +353,6 @@ define void @TestSIToFPI32_F128() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestSIToFPI32_F128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movl {{.*}}(%rip), %edi -; X64_NO_MMX-NEXT: callq __floatsitf -; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip) -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestSIToFPI32_F128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi @@ -504,16 +390,6 @@ define void @TestUIToFPU32_F128() #2 { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestUIToFPU32_F128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movl {{.*}}(%rip), %edi -; X64_NO_MMX-NEXT: callq __floatunsitf -; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip) -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestUIToFPU32_F128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi @@ -551,16 +427,6 @@ define void @TestSIToFPI64_F128() nounwind { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestSIToFPI64_F128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: callq __floatditf -; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip) -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestSIToFPI64_F128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi @@ -599,16 +465,6 @@ define void @TestUIToFPU64_F128() #2 { ; X64-NEXT: popq %rax ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestUIToFPU64_F128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq {{.*}}(%rip), %rdi -; X64_NO_MMX-NEXT: callq __floatunditf -; X64_NO_MMX-NEXT: movq %rdx, vf128+{{.*}}(%rip) -; X64_NO_MMX-NEXT: movq %rax, {{.*}}(%rip) -; X64_NO_MMX-NEXT: popq %rax -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestUIToFPU64_F128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi @@ -650,19 +506,6 @@ define i32 @TestConst128(fp128 %v) nounwind { ; X64-NEXT: popq %rcx ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestConst128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movabsq $4611404543450677248, %rcx # imm = 0x3FFF000000000000 -; X64_NO_MMX-NEXT: xorl %edx, %edx -; X64_NO_MMX-NEXT: callq __gttf2 -; X64_NO_MMX-NEXT: xorl %ecx, %ecx -; X64_NO_MMX-NEXT: testl %eax, %eax -; X64_NO_MMX-NEXT: setg %cl -; X64_NO_MMX-NEXT: movl %ecx, %eax -; X64_NO_MMX-NEXT: popq %rcx -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestConst128: ; X32: # %bb.0: # %entry ; X32-NEXT: subl $12, %esp @@ -718,21 +561,6 @@ define i32 @TestBits128(fp128 %ld) nounwind { ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestBits128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq %rdi, %rdx -; X64_NO_MMX-NEXT: movq %rsi, %rcx -; X64_NO_MMX-NEXT: callq __multf3 -; X64_NO_MMX-NEXT: movq %rax, %rdx -; X64_NO_MMX-NEXT: shrq $32, %rdx -; X64_NO_MMX-NEXT: xorl %ecx, %ecx -; X64_NO_MMX-NEXT: orl %eax, %edx -; X64_NO_MMX-NEXT: sete %cl -; X64_NO_MMX-NEXT: movl %ecx, %eax -; X64_NO_MMX-NEXT: popq %rcx -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestBits128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %edi @@ -798,14 +626,6 @@ define fp128 @TestPair128(i64 %a, i64 %b) nounwind { ; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestPair128: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: movq %rsi, %rax -; X64_NO_MMX-NEXT: addq $3, %rax -; X64_NO_MMX-NEXT: adcq $0, %rdi -; X64_NO_MMX-NEXT: movq %rdi, %rdx -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestPair128: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %edi @@ -852,26 +672,6 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-NEXT: .LBB17_2: # %cleanup ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: TestTruncCopysign: -; X64_NO_MMX: # %bb.0: # %entry -; X64_NO_MMX-NEXT: movl %edx, %ecx -; X64_NO_MMX-NEXT: movq %rsi, %rdx -; X64_NO_MMX-NEXT: movq %rdi, %rax -; X64_NO_MMX-NEXT: cmpl $50001, %ecx # imm = 0xC351 -; X64_NO_MMX-NEXT: jl .LBB17_2 -; X64_NO_MMX-NEXT: # %bb.1: # %if.then -; X64_NO_MMX-NEXT: pushq %rax -; X64_NO_MMX-NEXT: movq %rax, %rdi -; X64_NO_MMX-NEXT: movq %rdx, %rsi -; X64_NO_MMX-NEXT: callq __trunctfdf2 -; X64_NO_MMX-NEXT: andps {{.*}}(%rip), %xmm0 -; X64_NO_MMX-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X64_NO_MMX-NEXT: orps %xmm1, %xmm0 -; X64_NO_MMX-NEXT: callq __extenddftf2 -; X64_NO_MMX-NEXT: addq $8, %rsp -; X64_NO_MMX-NEXT: .LBB17_2: # %cleanup -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: TestTruncCopysign: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %edi @@ -947,12 +747,6 @@ define i1 @PR34866(i128 %x) nounwind { ; X64-NEXT: sete %al ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: PR34866: -; X64_NO_MMX: # %bb.0: -; X64_NO_MMX-NEXT: orq %rsi, %rdi -; X64_NO_MMX-NEXT: sete %al -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: PR34866: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -978,12 +772,6 @@ define i1 @PR34866_commute(i128 %x) nounwind { ; X64-NEXT: sete %al ; X64-NEXT: retq ; -; X64_NO_MMX-LABEL: PR34866_commute: -; X64_NO_MMX: # %bb.0: -; X64_NO_MMX-NEXT: orq %rsi, %rdi -; X64_NO_MMX-NEXT: sete %al -; X64_NO_MMX-NEXT: retq -; ; X32-LABEL: PR34866_commute: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/fp128-select.ll b/llvm/test/CodeGen/X86/fp128-select.ll index 503c7a9..134f1f3 100644 --- a/llvm/test/CodeGen/X86/fp128-select.ll +++ b/llvm/test/CodeGen/X86/fp128-select.ll @@ -1,37 +1,37 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \ -; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \ -; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=MMX -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \ -; RUN: -enable-legalize-types-checking | FileCheck %s -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \ -; RUN: -enable-legalize-types-checking | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+sse \ +; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+sse \ +; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=-sse \ +; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=NOSSE +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=-sse \ +; RUN: -enable-legalize-types-checking | FileCheck %s --check-prefix=NOSSE define void @test_select(fp128* %p, fp128* %q, i1 zeroext %c) { -; MMX-LABEL: test_select: -; MMX: # %bb.0: -; MMX-NEXT: testl %edx, %edx -; MMX-NEXT: jne .LBB0_1 -; MMX-NEXT: # %bb.2: -; MMX-NEXT: movaps {{.*}}(%rip), %xmm0 -; MMX-NEXT: movaps %xmm0, (%rsi) -; MMX-NEXT: retq -; MMX-NEXT: .LBB0_1: -; MMX-NEXT: movups (%rdi), %xmm0 -; MMX-NEXT: movaps %xmm0, (%rsi) -; MMX-NEXT: retq +; SSE-LABEL: test_select: +; SSE: # %bb.0: +; SSE-NEXT: testl %edx, %edx +; SSE-NEXT: jne .LBB0_1 +; SSE-NEXT: # %bb.2: +; SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps %xmm0, (%rsi) +; SSE-NEXT: retq +; SSE-NEXT: .LBB0_1: +; SSE-NEXT: movups (%rdi), %xmm0 +; SSE-NEXT: movaps %xmm0, (%rsi) +; SSE-NEXT: retq ; -; CHECK-LABEL: test_select: -; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edx, %edx -; CHECK-NEXT: cmovneq (%rdi), %rax -; CHECK-NEXT: movabsq $9223231299366420480, %rcx # imm = 0x7FFF800000000000 -; CHECK-NEXT: cmovneq 8(%rdi), %rcx -; CHECK-NEXT: movq %rcx, 8(%rsi) -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: retq +; NOSSE-LABEL: test_select: +; NOSSE: # %bb.0: +; NOSSE-NEXT: xorl %eax, %eax +; NOSSE-NEXT: testl %edx, %edx +; NOSSE-NEXT: cmovneq (%rdi), %rax +; NOSSE-NEXT: movabsq $9223231299366420480, %rcx # imm = 0x7FFF800000000000 +; NOSSE-NEXT: cmovneq 8(%rdi), %rcx +; NOSSE-NEXT: movq %rcx, 8(%rsi) +; NOSSE-NEXT: movq %rax, (%rsi) +; NOSSE-NEXT: retq %a = load fp128, fp128* %p, align 2 %r = select i1 %c, fp128 %a, fp128 0xL00000000000000007FFF800000000000 store fp128 %r, fp128* %q diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index b408651..fc32333 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -2259,44 +2259,36 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { ; SSE-LABEL: fptosi_2f128_to_4i32: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: pushq %r14 ; SSE-NEXT: pushq %rbx -; SSE-NEXT: movq %rcx, %r14 -; SSE-NEXT: movq %rdx, %rbx +; SSE-NEXT: subq $16, %rsp +; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; SSE-NEXT: callq __fixtfsi -; SSE-NEXT: movl %eax, %ebp -; SSE-NEXT: movq %rbx, %rdi -; SSE-NEXT: movq %r14, %rsi +; SSE-NEXT: movl %eax, %ebx +; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; SSE-NEXT: callq __fixtfsi ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movd %ebp, %xmm1 +; SSE-NEXT: movd %ebx, %xmm1 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero +; SSE-NEXT: addq $16, %rsp ; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r14 -; SSE-NEXT: popq %rbp ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_2f128_to_4i32: ; AVX: # %bb.0: -; AVX-NEXT: pushq %rbp -; AVX-NEXT: pushq %r14 ; AVX-NEXT: pushq %rbx -; AVX-NEXT: movq %rcx, %r14 -; AVX-NEXT: movq %rdx, %rbx +; AVX-NEXT: subq $16, %rsp +; AVX-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill ; AVX-NEXT: callq __fixtfsi -; AVX-NEXT: movl %eax, %ebp -; AVX-NEXT: movq %rbx, %rdi -; AVX-NEXT: movq %r14, %rsi +; AVX-NEXT: movl %eax, %ebx +; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX-NEXT: callq __fixtfsi ; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vmovd %ebp, %xmm1 +; AVX-NEXT: vmovd %ebx, %xmm1 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX-NEXT: addq $16, %rsp ; AVX-NEXT: popq %rbx -; AVX-NEXT: popq %r14 -; AVX-NEXT: popq %rbp ; AVX-NEXT: retq %cvt = fptosi <2 x fp128> %a to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> -- 2.7.4