; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
ret <2 x i64> %ret
}
+define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
+; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; SSE-32: # %bb.0:
+; SSE-32-NEXT: pushl %ebp
+; SSE-32-NEXT: .cfi_def_cfa_offset 8
+; SSE-32-NEXT: .cfi_offset %ebp, -8
+; SSE-32-NEXT: movl %esp, %ebp
+; SSE-32-NEXT: .cfi_def_cfa_register %ebp
+; SSE-32-NEXT: andl $-8, %esp
+; SSE-32-NEXT: subl $24, %esp
+; SSE-32-NEXT: movl 8(%ebp), %eax
+; SSE-32-NEXT: movaps (%eax), %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw (%esp)
+; SSE-32-NEXT: movzwl (%esp), %eax
+; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw (%esp)
+; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-32-NEXT: movl %ebp, %esp
+; SSE-32-NEXT: popl %ebp
+; SSE-32-NEXT: .cfi_def_cfa %esp, 4
+; SSE-32-NEXT: retl
+;
+; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; SSE-64: # %bb.0:
+; SSE-64-NEXT: movaps (%rdi), %xmm1
+; SSE-64-NEXT: cvttss2si %xmm1, %rax
+; SSE-64-NEXT: movq %rax, %xmm0
+; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-64-NEXT: cvttss2si %xmm1, %rax
+; SSE-64-NEXT: movq %rax, %xmm1
+; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-64-NEXT: retq
+;
+; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: .cfi_def_cfa_offset 8
+; AVX-32-NEXT: .cfi_offset %ebp, -8
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX-32-NEXT: andl $-8, %esp
+; AVX-32-NEXT: subl $16, %esp
+; AVX-32-NEXT: movl 8(%ebp), %eax
+; AVX-32-NEXT: vmovaps (%eax), %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
+; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vcvttss2si 4(%rdi), %rax
+; AVX-64-NEXT: vmovq %rax, %xmm0
+; AVX-64-NEXT: vcvttss2si (%rdi), %rax
+; AVX-64-NEXT: vmovq %rax, %xmm1
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-64-NEXT: retq
+;
+; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: movl 8(%ebp), %eax
+; AVX512F-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512F-32-NEXT: vmovd %xmm0, (%esp)
+; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttss2si 4(%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vcvttss2si (%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
+; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VL-32: # %bb.0:
+; AVX512VL-32-NEXT: pushl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
+; AVX512VL-32-NEXT: movl %esp, %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512VL-32-NEXT: andl $-8, %esp
+; AVX512VL-32-NEXT: subl $16, %esp
+; AVX512VL-32-NEXT: movl 8(%ebp), %eax
+; AVX512VL-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512VL-32-NEXT: vmovd %xmm0, (%esp)
+; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: flds (%esp)
+; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
+; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: movl %ebp, %esp
+; AVX512VL-32-NEXT: popl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512VL-32-NEXT: retl
+;
+; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VL-64: # %bb.0:
+; AVX512VL-64-NEXT: vcvttss2si 4(%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm0
+; AVX512VL-64-NEXT: vcvttss2si (%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm1
+; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-64-NEXT: retq
+;
+; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512DQ-32: # %bb.0:
+; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-32-NEXT: vzeroupper
+; AVX512DQ-32-NEXT: retl
+;
+; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512DQ-64: # %bb.0:
+; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-64-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-64-NEXT: vzeroupper
+; AVX512DQ-64-NEXT: retq
+;
+; AVX512VLDQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VLDQ-32: # %bb.0:
+; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512VLDQ-32-NEXT: vcvttps2qq (%eax), %xmm0
+; AVX512VLDQ-32-NEXT: retl
+;
+; AVX512VLDQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VLDQ-64: # %bb.0:
+; AVX512VLDQ-64-NEXT: vcvttps2qq (%rdi), %xmm0
+; AVX512VLDQ-64-NEXT: retq
+ %a = load <4 x float>, <4 x float>* %x
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %c = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %c
+}
+
define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; SSE-32: # %bb.0:
; SSE-32-NEXT: comiss %xmm2, %xmm0
; SSE-32-NEXT: xorps %xmm1, %xmm1
; SSE-32-NEXT: xorps %xmm3, %xmm3
-; SSE-32-NEXT: jb .LBB3_2
+; SSE-32-NEXT: jb .LBB4_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: .LBB3_2:
+; SSE-32-NEXT: .LBB4_2:
; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: comiss %xmm2, %xmm0
-; SSE-32-NEXT: jb .LBB3_4
+; SSE-32-NEXT: jb .LBB4_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movaps %xmm2, %xmm1
-; SSE-32-NEXT: .LBB3_4:
+; SSE-32-NEXT: .LBB4_4:
; SSE-32-NEXT: subss %xmm1, %xmm0
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
; SSE-64-NEXT: comiss %xmm3, %xmm0
; SSE-64-NEXT: xorps %xmm2, %xmm2
; SSE-64-NEXT: xorps %xmm1, %xmm1
-; SSE-64-NEXT: jb .LBB3_2
+; SSE-64-NEXT: jb .LBB4_2
; SSE-64-NEXT: # %bb.1:
; SSE-64-NEXT: movaps %xmm3, %xmm1
-; SSE-64-NEXT: .LBB3_2:
+; SSE-64-NEXT: .LBB4_2:
; SSE-64-NEXT: movaps %xmm0, %xmm4
; SSE-64-NEXT: subss %xmm1, %xmm4
; SSE-64-NEXT: cvttss2si %xmm4, %rax
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-64-NEXT: comiss %xmm3, %xmm0
-; SSE-64-NEXT: jb .LBB3_4
+; SSE-64-NEXT: jb .LBB4_4
; SSE-64-NEXT: # %bb.3:
; SSE-64-NEXT: movaps %xmm3, %xmm2
-; SSE-64-NEXT: .LBB3_4:
+; SSE-64-NEXT: .LBB4_4:
; SSE-64-NEXT: subss %xmm2, %xmm0
; SSE-64-NEXT: cvttss2si %xmm0, %rax
; SSE-64-NEXT: setae %cl
; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX-32-NEXT: jb .LBB3_2
+; AVX-32-NEXT: jb .LBB4_2
; AVX-32-NEXT: # %bb.1:
; AVX-32-NEXT: vmovaps %xmm1, %xmm4
-; AVX-32-NEXT: .LBB3_2:
+; AVX-32-NEXT: .LBB4_2:
; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vcomiss %xmm1, %xmm0
-; AVX-32-NEXT: jb .LBB3_4
+; AVX-32-NEXT: jb .LBB4_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
-; AVX-32-NEXT: .LBB3_4:
+; AVX-32-NEXT: .LBB4_4:
; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-32-NEXT: vmovss %xmm0, (%esp)
; AVX-32-NEXT: flds (%esp)
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-64-NEXT: jb .LBB3_2
+; AVX-64-NEXT: jb .LBB4_2
; AVX-64-NEXT: # %bb.1:
; AVX-64-NEXT: vmovaps %xmm1, %xmm3
-; AVX-64-NEXT: .LBB3_2:
+; AVX-64-NEXT: .LBB4_2:
; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
; AVX-64-NEXT: vcvttss2si %xmm3, %rax
; AVX-64-NEXT: setae %cl
; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
-; AVX-64-NEXT: jb .LBB3_4
+; AVX-64-NEXT: jb .LBB4_4
; AVX-64-NEXT: # %bb.3:
; AVX-64-NEXT: vmovaps %xmm1, %xmm2
-; AVX-64-NEXT: .LBB3_4:
+; AVX-64-NEXT: .LBB4_4:
; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vcvttss2si %xmm0, %rax
; AVX-64-NEXT: setae %cl
ret <2 x i64> %ret
}
+define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
+; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; SSE-32: # %bb.0:
+; SSE-32-NEXT: pushl %ebp
+; SSE-32-NEXT: .cfi_def_cfa_offset 8
+; SSE-32-NEXT: .cfi_offset %ebp, -8
+; SSE-32-NEXT: movl %esp, %ebp
+; SSE-32-NEXT: .cfi_def_cfa_register %ebp
+; SSE-32-NEXT: andl $-8, %esp
+; SSE-32-NEXT: subl $24, %esp
+; SSE-32-NEXT: movl 8(%ebp), %eax
+; SSE-32-NEXT: movaps (%eax), %xmm0
+; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: xorps %xmm3, %xmm3
+; SSE-32-NEXT: jb .LBB5_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movaps %xmm2, %xmm3
+; SSE-32-NEXT: .LBB5_2:
+; SSE-32-NEXT: movaps %xmm0, %xmm4
+; SSE-32-NEXT: subss %xmm3, %xmm4
+; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: jb .LBB5_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm2, %xmm1
+; SSE-32-NEXT: .LBB5_4:
+; SSE-32-NEXT: subss %xmm1, %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %cl
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw (%esp)
+; SSE-32-NEXT: movzwl (%esp), %edx
+; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
+; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw (%esp)
+; SSE-32-NEXT: movzbl %al, %eax
+; SSE-32-NEXT: shll $31, %eax
+; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; SSE-32-NEXT: movd %eax, %xmm1
+; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-32-NEXT: movzbl %cl, %eax
+; SSE-32-NEXT: shll $31, %eax
+; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; SSE-32-NEXT: movd %eax, %xmm1
+; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-32-NEXT: movl %ebp, %esp
+; SSE-32-NEXT: popl %ebp
+; SSE-32-NEXT: .cfi_def_cfa %esp, 4
+; SSE-32-NEXT: retl
+;
+; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; SSE-64: # %bb.0:
+; SSE-64-NEXT: movaps (%rdi), %xmm1
+; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-64-NEXT: comiss %xmm3, %xmm1
+; SSE-64-NEXT: xorps %xmm2, %xmm2
+; SSE-64-NEXT: xorps %xmm0, %xmm0
+; SSE-64-NEXT: jb .LBB5_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movaps %xmm3, %xmm0
+; SSE-64-NEXT: .LBB5_2:
+; SSE-64-NEXT: movaps %xmm1, %xmm4
+; SSE-64-NEXT: subss %xmm0, %xmm4
+; SSE-64-NEXT: cvttss2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
+; SSE-64-NEXT: xorq %rax, %rcx
+; SSE-64-NEXT: movq %rcx, %xmm0
+; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-64-NEXT: comiss %xmm3, %xmm1
+; SSE-64-NEXT: jb .LBB5_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movaps %xmm3, %xmm2
+; SSE-64-NEXT: .LBB5_4:
+; SSE-64-NEXT: subss %xmm2, %xmm1
+; SSE-64-NEXT: cvttss2si %xmm1, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
+; SSE-64-NEXT: xorq %rax, %rcx
+; SSE-64-NEXT: movq %rcx, %xmm1
+; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-64-NEXT: retq
+;
+; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: .cfi_def_cfa_offset 8
+; AVX-32-NEXT: .cfi_offset %ebp, -8
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX-32-NEXT: andl $-8, %esp
+; AVX-32-NEXT: subl $16, %esp
+; AVX-32-NEXT: movl 8(%ebp), %eax
+; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
+; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-32-NEXT: jb .LBB5_2
+; AVX-32-NEXT: # %bb.1:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX-32-NEXT: .LBB5_2:
+; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
+; AVX-32-NEXT: setae %al
+; AVX-32-NEXT: movzbl %al, %eax
+; AVX-32-NEXT: shll $31, %eax
+; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
+; AVX-32-NEXT: jb .LBB5_4
+; AVX-32-NEXT: # %bb.3:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm2
+; AVX-32-NEXT: .LBB5_4:
+; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
+; AVX-32-NEXT: setae %cl
+; AVX-32-NEXT: movzbl %cl, %ecx
+; AVX-32-NEXT: shll $31, %ecx
+; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vcomiss %xmm1, %xmm3
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-64-NEXT: jb .LBB5_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm4
+; AVX-64-NEXT: .LBB5_2:
+; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX-64-NEXT: vcvttss2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
+; AVX-64-NEXT: xorq %rax, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm3
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB5_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB5_4:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
+; AVX-64-NEXT: xorq %rax, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm0
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX-64-NEXT: retq
+;
+; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: movl 8(%ebp), %eax
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: xorl %eax, %eax
+; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1
+; AVX512F-32-NEXT: setb %cl
+; AVX512F-32-NEXT: kmovw %ecx, %k1
+; AVX512F-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX512F-32-NEXT: vmovaps %xmm2, %xmm4
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
+; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
+; AVX512F-32-NEXT: setae %al
+; AVX512F-32-NEXT: shll $31, %eax
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: xorl %ecx, %ecx
+; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0
+; AVX512F-32-NEXT: setb %dl
+; AVX512F-32-NEXT: kmovw %edx, %k1
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
+; AVX512F-32-NEXT: setae %cl
+; AVX512F-32-NEXT: shll $31, %ecx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttss2usi 4(%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vcvttss2usi (%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
+; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VL-32: # %bb.0:
+; AVX512VL-32-NEXT: pushl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
+; AVX512VL-32-NEXT: movl %esp, %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512VL-32-NEXT: andl $-8, %esp
+; AVX512VL-32-NEXT: subl $16, %esp
+; AVX512VL-32-NEXT: movl 8(%ebp), %eax
+; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: xorl %eax, %eax
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1
+; AVX512VL-32-NEXT: setb %cl
+; AVX512VL-32-NEXT: kmovw %ecx, %k1
+; AVX512VL-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX512VL-32-NEXT: vmovaps %xmm2, %xmm4
+; AVX512VL-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512VL-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
+; AVX512VL-32-NEXT: setae %al
+; AVX512VL-32-NEXT: shll $31, %eax
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: xorl %ecx, %ecx
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0
+; AVX512VL-32-NEXT: setb %dl
+; AVX512VL-32-NEXT: kmovw %edx, %k1
+; AVX512VL-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512VL-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
+; AVX512VL-32-NEXT: flds (%esp)
+; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
+; AVX512VL-32-NEXT: setae %cl
+; AVX512VL-32-NEXT: shll $31, %ecx
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX512VL-32-NEXT: movl %ebp, %esp
+; AVX512VL-32-NEXT: popl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512VL-32-NEXT: retl
+;
+; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VL-64: # %bb.0:
+; AVX512VL-64-NEXT: vcvttss2usi 4(%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm0
+; AVX512VL-64-NEXT: vcvttss2usi (%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm1
+; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-64-NEXT: retq
+;
+; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512DQ-32: # %bb.0:
+; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-32-NEXT: vzeroupper
+; AVX512DQ-32-NEXT: retl
+;
+; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512DQ-64: # %bb.0:
+; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-64-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-64-NEXT: vzeroupper
+; AVX512DQ-64-NEXT: retq
+;
+; AVX512VLDQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VLDQ-32: # %bb.0:
+; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512VLDQ-32-NEXT: vcvttps2uqq (%eax), %xmm0
+; AVX512VLDQ-32-NEXT: retl
+;
+; AVX512VLDQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VLDQ-64: # %bb.0:
+; AVX512VLDQ-64-NEXT: vcvttps2uqq (%rdi), %xmm0
+; AVX512VLDQ-64-NEXT: retq
+ %a = load <4 x float>, <4 x float>* %x
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %c = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %c
+}
+
define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; SSE-32: # %bb.0:
; SSE-32-NEXT: comisd %xmm3, %xmm0
; SSE-32-NEXT: xorpd %xmm2, %xmm2
; SSE-32-NEXT: xorpd %xmm1, %xmm1
-; SSE-32-NEXT: jb .LBB5_2
+; SSE-32-NEXT: jb .LBB7_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movapd %xmm3, %xmm1
-; SSE-32-NEXT: .LBB5_2:
+; SSE-32-NEXT: .LBB7_2:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-32-NEXT: comisd %xmm3, %xmm0
-; SSE-32-NEXT: jb .LBB5_4
+; SSE-32-NEXT: jb .LBB7_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm3, %xmm2
-; SSE-32-NEXT: .LBB5_4:
+; SSE-32-NEXT: .LBB7_4:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: comiss %xmm3, %xmm0
; SSE-32-NEXT: xorps %xmm2, %xmm2
; SSE-32-NEXT: xorps %xmm1, %xmm1
-; SSE-32-NEXT: jb .LBB7_2
+; SSE-32-NEXT: jb .LBB9_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm3, %xmm1
-; SSE-32-NEXT: .LBB7_2:
+; SSE-32-NEXT: .LBB9_2:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: comiss %xmm3, %xmm0
-; SSE-32-NEXT: jb .LBB7_4
+; SSE-32-NEXT: jb .LBB9_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movaps %xmm3, %xmm2
-; SSE-32-NEXT: .LBB7_4:
+; SSE-32-NEXT: .LBB9_4:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: xorpd %xmm1, %xmm1
; SSE-32-NEXT: xorpd %xmm3, %xmm3
-; SSE-32-NEXT: jb .LBB17_2
+; SSE-32-NEXT: jb .LBB19_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movapd %xmm2, %xmm3
-; SSE-32-NEXT: .LBB17_2:
+; SSE-32-NEXT: .LBB19_2:
; SSE-32-NEXT: movapd %xmm0, %xmm4
; SSE-32-NEXT: subsd %xmm3, %xmm4
; SSE-32-NEXT: movsd %xmm4, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-32-NEXT: comisd %xmm2, %xmm0
-; SSE-32-NEXT: jb .LBB17_4
+; SSE-32-NEXT: jb .LBB19_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm2, %xmm1
-; SSE-32-NEXT: .LBB17_4:
+; SSE-32-NEXT: .LBB19_4:
; SSE-32-NEXT: subsd %xmm1, %xmm0
; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
; SSE-64-NEXT: comisd %xmm3, %xmm0
; SSE-64-NEXT: xorpd %xmm2, %xmm2
; SSE-64-NEXT: xorpd %xmm1, %xmm1
-; SSE-64-NEXT: jb .LBB17_2
+; SSE-64-NEXT: jb .LBB19_2
; SSE-64-NEXT: # %bb.1:
; SSE-64-NEXT: movapd %xmm3, %xmm1
-; SSE-64-NEXT: .LBB17_2:
+; SSE-64-NEXT: .LBB19_2:
; SSE-64-NEXT: movapd %xmm0, %xmm4
; SSE-64-NEXT: subsd %xmm1, %xmm4
; SSE-64-NEXT: cvttsd2si %xmm4, %rax
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-64-NEXT: comisd %xmm3, %xmm0
-; SSE-64-NEXT: jb .LBB17_4
+; SSE-64-NEXT: jb .LBB19_4
; SSE-64-NEXT: # %bb.3:
; SSE-64-NEXT: movapd %xmm3, %xmm2
-; SSE-64-NEXT: .LBB17_4:
+; SSE-64-NEXT: .LBB19_4:
; SSE-64-NEXT: subsd %xmm2, %xmm0
; SSE-64-NEXT: cvttsd2si %xmm0, %rax
; SSE-64-NEXT: setae %cl
; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
-; AVX-32-NEXT: jb .LBB17_2
+; AVX-32-NEXT: jb .LBB19_2
; AVX-32-NEXT: # %bb.1:
; AVX-32-NEXT: vmovapd %xmm1, %xmm4
-; AVX-32-NEXT: .LBB17_2:
+; AVX-32-NEXT: .LBB19_2:
; AVX-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovsd %xmm3, (%esp)
; AVX-32-NEXT: fldl (%esp)
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vcomisd %xmm1, %xmm0
-; AVX-32-NEXT: jb .LBB17_4
+; AVX-32-NEXT: jb .LBB19_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
-; AVX-32-NEXT: .LBB17_4:
+; AVX-32-NEXT: .LBB19_4:
; AVX-32-NEXT: vsubsd %xmm2, %xmm0, %xmm0
; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
; AVX-64-NEXT: vcomisd %xmm1, %xmm0
; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
-; AVX-64-NEXT: jb .LBB17_2
+; AVX-64-NEXT: jb .LBB19_2
; AVX-64-NEXT: # %bb.1:
; AVX-64-NEXT: vmovapd %xmm1, %xmm3
-; AVX-64-NEXT: .LBB17_2:
+; AVX-64-NEXT: .LBB19_2:
; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
; AVX-64-NEXT: setae %cl
; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT: vcomisd %xmm1, %xmm0
-; AVX-64-NEXT: jb .LBB17_4
+; AVX-64-NEXT: jb .LBB19_4
; AVX-64-NEXT: # %bb.3:
; AVX-64-NEXT: vmovapd %xmm1, %xmm2
-; AVX-64-NEXT: .LBB17_4:
+; AVX-64-NEXT: .LBB19_4:
; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX-64-NEXT: setae %cl
; SSE-32-NEXT: comiss %xmm2, %xmm0
; SSE-32-NEXT: xorps %xmm1, %xmm1
; SSE-32-NEXT: xorps %xmm3, %xmm3
-; SSE-32-NEXT: jb .LBB19_2
+; SSE-32-NEXT: jb .LBB21_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: .LBB19_2:
+; SSE-32-NEXT: .LBB21_2:
; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: comiss %xmm2, %xmm0
-; SSE-32-NEXT: jb .LBB19_4
+; SSE-32-NEXT: jb .LBB21_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movaps %xmm2, %xmm1
-; SSE-32-NEXT: .LBB19_4:
+; SSE-32-NEXT: .LBB21_4:
; SSE-32-NEXT: subss %xmm1, %xmm0
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
; SSE-64-NEXT: comiss %xmm3, %xmm0
; SSE-64-NEXT: xorps %xmm2, %xmm2
; SSE-64-NEXT: xorps %xmm1, %xmm1
-; SSE-64-NEXT: jb .LBB19_2
+; SSE-64-NEXT: jb .LBB21_2
; SSE-64-NEXT: # %bb.1:
; SSE-64-NEXT: movaps %xmm3, %xmm1
-; SSE-64-NEXT: .LBB19_2:
+; SSE-64-NEXT: .LBB21_2:
; SSE-64-NEXT: movaps %xmm0, %xmm4
; SSE-64-NEXT: subss %xmm1, %xmm4
; SSE-64-NEXT: cvttss2si %xmm4, %rax
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-64-NEXT: comiss %xmm3, %xmm0
-; SSE-64-NEXT: jb .LBB19_4
+; SSE-64-NEXT: jb .LBB21_4
; SSE-64-NEXT: # %bb.3:
; SSE-64-NEXT: movaps %xmm3, %xmm2
-; SSE-64-NEXT: .LBB19_4:
+; SSE-64-NEXT: .LBB21_4:
; SSE-64-NEXT: subss %xmm2, %xmm0
; SSE-64-NEXT: cvttss2si %xmm0, %rax
; SSE-64-NEXT: setae %cl
; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX-32-NEXT: jb .LBB19_2
+; AVX-32-NEXT: jb .LBB21_2
; AVX-32-NEXT: # %bb.1:
; AVX-32-NEXT: vmovaps %xmm1, %xmm4
-; AVX-32-NEXT: .LBB19_2:
+; AVX-32-NEXT: .LBB21_2:
; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vcomiss %xmm1, %xmm0
-; AVX-32-NEXT: jb .LBB19_4
+; AVX-32-NEXT: jb .LBB21_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
-; AVX-32-NEXT: .LBB19_4:
+; AVX-32-NEXT: .LBB21_4:
; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-32-NEXT: vmovss %xmm0, (%esp)
; AVX-32-NEXT: flds (%esp)
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-64-NEXT: jb .LBB19_2
+; AVX-64-NEXT: jb .LBB21_2
; AVX-64-NEXT: # %bb.1:
; AVX-64-NEXT: vmovaps %xmm1, %xmm3
-; AVX-64-NEXT: .LBB19_2:
+; AVX-64-NEXT: .LBB21_2:
; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
; AVX-64-NEXT: vcvttss2si %xmm3, %rax
; AVX-64-NEXT: setae %cl
; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
-; AVX-64-NEXT: jb .LBB19_4
+; AVX-64-NEXT: jb .LBB21_4
; AVX-64-NEXT: # %bb.3:
; AVX-64-NEXT: vmovaps %xmm1, %xmm2
-; AVX-64-NEXT: .LBB19_4:
+; AVX-64-NEXT: .LBB21_4:
; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vcvttss2si %xmm0, %rax
; AVX-64-NEXT: setae %cl