; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vmovhps %xmm0, (%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
+; AVX512F-32-NEXT: xorl %eax, %eax
+; AVX512F-32-NEXT: vcomisd %xmm2, %xmm1
+; AVX512F-32-NEXT: setb %cl
+; AVX512F-32-NEXT: kmovw %ecx, %k1
+; AVX512F-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX512F-32-NEXT: vmovapd %xmm2, %xmm4
+; AVX512F-32-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubsd %xmm4, %xmm1, %xmm1
+; AVX512F-32-NEXT: vmovsd %xmm1, (%esp)
+; AVX512F-32-NEXT: fldl (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: setae %al
+; AVX512F-32-NEXT: shll $31, %eax
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: xorl %ecx, %ecx
+; AVX512F-32-NEXT: vcomisd %xmm2, %xmm0
+; AVX512F-32-NEXT: setb %dl
+; AVX512F-32-NEXT: kmovw %edx, %k1
+; AVX512F-32-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512F-32-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: setae %cl
+; AVX512F-32-NEXT: shll $31, %ecx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: vmovd %xmm0, (%esp)
+; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512DQ-64-NEXT: vmovq %rax, %xmm0
; AVX512DQ-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512DQ-64-NEXT: retq
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: xorl %eax, %eax
+; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1
+; AVX512F-32-NEXT: setb %cl
+; AVX512F-32-NEXT: kmovw %ecx, %k1
+; AVX512F-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX512F-32-NEXT: vmovaps %xmm2, %xmm4
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
+; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: setae %al
+; AVX512F-32-NEXT: shll $31, %eax
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: xorl %ecx, %ecx
+; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0
+; AVX512F-32-NEXT: setb %dl
+; AVX512F-32-NEXT: kmovw %edx, %k1
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: setae %cl
+; AVX512F-32-NEXT: shll $31, %ecx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512DQ-64-NEXT: vmovq %rax, %xmm0
; AVX512DQ-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512DQ-64-NEXT: retq
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i64> %ret
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX512F-NEXT: vcvttsd2usi %xmm1, %eax
+; AVX512F-NEXT: vcvttsd2usi %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm1, %eax
+; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
+; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
+; AVX512VLDQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2usi %xmm1, %eax
+; AVX512F-NEXT: vcvttss2usi %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2usi %xmm1, %eax
+; AVX512VLDQ-NEXT: vcvttss2usi %xmm0, %ecx
+; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
+; AVX512VLDQ-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i32> %ret
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm1, %eax
+; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
+; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
+; AVX512VLDQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm1, %eax
+; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
+; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
+; AVX512VLDQ-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i16> %ret
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm1, %eax
+; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
+; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
+; AVX512VLDQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm1, %eax
+; AVX512F-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512F-NEXT: vmovd %ecx, %xmm0
+; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT: vmovd %ecx, %xmm0
; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
+; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %ecx
+; AVX512VLDQ-NEXT: vmovd %ecx, %xmm0
+; AVX512VLDQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i8> %ret
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
+; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovaps %xmm0, %xmm0
+; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
+; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
+; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttss2si %xmm0, %eax
+; AVX512F-NEXT: andl $1, %eax
+; AVX512F-NEXT: kmovw %eax, %k0
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm0, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: kshiftlw $1, %k1, %k1
+; AVX512F-NEXT: korw %k1, %k0, %k1
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
+; AVX512VLDQ-NEXT: kmovw %eax, %k0
+; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0
+; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax
+; AVX512VLDQ-NEXT: kmovw %eax, %k1
+; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
+; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1
+; AVX512VLDQ-NEXT: korw %k0, %k1, %k0
+; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttss2si %xmm0, %eax
+; AVX512F-NEXT: andl $1, %eax
+; AVX512F-NEXT: kmovw %eax, %k0
+; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-NEXT: vcvttss2si %xmm0, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: kshiftlw $1, %k1, %k1
+; AVX512F-NEXT: korw %k1, %k0, %k1
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttss2si %xmm0, %eax
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax
+; AVX512VLDQ-NEXT: kmovw %eax, %k0
+; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0
+; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax
+; AVX512VLDQ-NEXT: kmovw %eax, %k1
+; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1
+; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1
+; AVX512VLDQ-NEXT: korw %k0, %k1, %k0
+; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float> %a,
metadata !"fpexcept.strict")
ret <2 x i1> %ret
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i32> %ret
; AVX-64-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; AVX-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
+; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i32> %ret
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i8> %ret
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i8> %ret
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
+; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
+; AVX512VLDQ: # %bb.0:
+; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0
+; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -disable-strictnode-mutation < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX-32
; RUN: llc -disable-strictnode-mutation < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-64
declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $32, %esp
+; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vmovhps %xmm0, (%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX512F-64-NEXT: vcvttsd2si %xmm1, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttpd2qq %ymm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: pushl %ebx
+; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $32, %esp
+; AVX512F-32-NEXT: .cfi_offset %esi, -16
+; AVX512F-32-NEXT: .cfi_offset %ebx, -12
+; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX512F-32-NEXT: vcomisd %xmm1, %xmm2
+; AVX512F-32-NEXT: setb %cl
+; AVX512F-32-NEXT: kmovw %ecx, %k1
+; AVX512F-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX512F-32-NEXT: vmovapd %xmm1, %xmm4
+; AVX512F-32-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubsd %xmm4, %xmm2, %xmm2
+; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: movl $0, %eax
+; AVX512F-32-NEXT: setae %al
+; AVX512F-32-NEXT: shll $31, %eax
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: movl %eax, %esi
+; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
+; AVX512F-32-NEXT: xorl %ecx, %ecx
+; AVX512F-32-NEXT: vcomisd %xmm1, %xmm4
+; AVX512F-32-NEXT: setb %dl
+; AVX512F-32-NEXT: kmovw %edx, %k1
+; AVX512F-32-NEXT: vmovapd %xmm1, %xmm5
+; AVX512F-32-NEXT: vmovsd %xmm3, %xmm5, %xmm5 {%k1}
+; AVX512F-32-NEXT: vsubsd %xmm5, %xmm4, %xmm4
+; AVX512F-32-NEXT: vmovsd %xmm4, (%esp)
+; AVX512F-32-NEXT: fldl (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: setae %cl
+; AVX512F-32-NEXT: shll $31, %ecx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: xorl %edx, %edx
+; AVX512F-32-NEXT: vcomisd %xmm1, %xmm2
+; AVX512F-32-NEXT: setb %bl
+; AVX512F-32-NEXT: kmovw %ebx, %k1
+; AVX512F-32-NEXT: vmovapd %xmm1, %xmm4
+; AVX512F-32-NEXT: vmovsd %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubsd %xmm4, %xmm2, %xmm2
+; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: setae %dl
+; AVX512F-32-NEXT: shll $31, %edx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: xorl %ebx, %ebx
+; AVX512F-32-NEXT: vcomisd %xmm1, %xmm0
+; AVX512F-32-NEXT: setb %al
+; AVX512F-32-NEXT: kmovw %eax, %k1
+; AVX512F-32-NEXT: vmovsd %xmm3, %xmm1, %xmm1 {%k1}
+; AVX512F-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX512F-32-NEXT: setae %bl
+; AVX512F-32-NEXT: shll $31, %ebx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
+; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX512F-32-NEXT: leal -8(%ebp), %esp
+; AVX512F-32-NEXT: popl %esi
+; AVX512F-32-NEXT: popl %ebx
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX512F-64-NEXT: vcvttsd2usi %xmm1, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $32, %esp
+; AVX512F-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vextractps $3, %xmm0, (%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX512F-64-NEXT: vcvttss2si %xmm1, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512F-64-NEXT: vcvttss2si %xmm2, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2qq %xmm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
+; AVX512F-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: pushl %ebx
+; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $32, %esp
+; AVX512F-32-NEXT: .cfi_offset %esi, -16
+; AVX512F-32-NEXT: .cfi_offset %ebx, -12
+; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
+; AVX512F-32-NEXT: setb %cl
+; AVX512F-32-NEXT: kmovw %ecx, %k1
+; AVX512F-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX512F-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm4, %xmm2, %xmm2
+; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: movl $0, %eax
+; AVX512F-32-NEXT: setae %al
+; AVX512F-32-NEXT: shll $31, %eax
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: movl %eax, %esi
+; AVX512F-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX512F-32-NEXT: xorl %ecx, %ecx
+; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
+; AVX512F-32-NEXT: setb %dl
+; AVX512F-32-NEXT: kmovw %edx, %k1
+; AVX512F-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm4, %xmm2, %xmm2
+; AVX512F-32-NEXT: vmovss %xmm2, (%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: setae %cl
+; AVX512F-32-NEXT: shll $31, %ecx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512F-32-NEXT: xorl %edx, %edx
+; AVX512F-32-NEXT: vcomiss %xmm1, %xmm2
+; AVX512F-32-NEXT: setb %bl
+; AVX512F-32-NEXT: kmovw %ebx, %k1
+; AVX512F-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm4, %xmm2, %xmm2
+; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: setae %dl
+; AVX512F-32-NEXT: shll $31, %edx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: xorl %ebx, %ebx
+; AVX512F-32-NEXT: vcomiss %xmm1, %xmm0
+; AVX512F-32-NEXT: setb %al
+; AVX512F-32-NEXT: kmovw %eax, %k1
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm1, %xmm1 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX512F-32-NEXT: setae %bl
+; AVX512F-32-NEXT: shll $31, %ebx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %ebx, %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
+; AVX512F-32-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1
+; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX512F-32-NEXT: leal -8(%ebp), %esp
+; AVX512F-32-NEXT: popl %esi
+; AVX512F-32-NEXT: popl %ebx
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX512F-64-NEXT: vcvttss2usi %xmm1, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX512F-64-NEXT: vcvttss2usi %xmm2, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm2
+; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX512F-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: pushl %ebp
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2uqq %xmm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %a,
metadata !"fpexcept.strict")
ret <4 x i64> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
+; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttpd2udq %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i32> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptosi_v4f64_to_v4i1:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
+; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0
+; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttpd2dq %ymm0, %xmm0
+; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v4f64_to_v4i1:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttpd2dq %ymm0, %xmm0
+; AVX512DQVL-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512DQVL-NEXT: vpmovd2m %xmm0, %k0
+; AVX512DQVL-NEXT: vpmovm2d %k0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f64(<4 x double> %a,
metadata !"fpexcept.strict")
ret <4 x i1> %ret
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
+; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2udq %ymm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i32> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i16> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i16:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i16> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i8> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i8:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmovdb %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i8> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptosi_v8f32_to_v8i1:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0
+; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0
+; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i1> %ret
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
+; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: strict_vector_fptoui_v8f32_to_v8i1:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpslld $31, %ymm0, %ymm0
+; AVX512DQVL-NEXT: vpmovd2m %ymm0, %k0
+; AVX512DQVL-NEXT: vpmovm2d %k0, %ymm0
+; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512DQVL-NEXT: vzeroupper
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float> %a,
metadata !"fpexcept.strict")
ret <8 x i1> %ret
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE,SSE-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX1,AVX-32,AVX1-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX1,AVX-64,AVX1-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-32,AVX512F-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-64,AVX512F-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-64
declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1>, metadata, metadata)
; AVX1-64-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
+; AVX512F-LABEL: uitofp_v4i1_v4f32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-32-LABEL: uitofp_v4i1_v4f32:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f32:
+; AVX512DQVL-32: # %bb.0:
+; AVX512DQVL-32-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
+; AVX512DQVL-32-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX512DQVL-32-NEXT: retl
+;
+; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f32:
+; AVX512DQVL-64: # %bb.0:
+; AVX512DQVL-64-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512DQVL-64-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX512DQVL-64-NEXT: retq
%result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-64-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX1-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
+; AVX512F-LABEL: uitofp_v4i32_v4f32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: uitofp_v4i32_v4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i32_v4f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v4i32_v4f32:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtudq2ps %xmm0, %xmm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
+; AVX512F-LABEL: uitofp_v2i1_v2f64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-32-LABEL: uitofp_v2i1_v2f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-32-LABEL: uitofp_v2i1_v2f64:
+; AVX512DQVL-32: # %bb.0:
+; AVX512DQVL-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512DQVL-32-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
+; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX512DQVL-32-NEXT: retl
+;
+; AVX512DQVL-64-LABEL: uitofp_v2i1_v2f64:
+; AVX512DQVL-64: # %bb.0:
+; AVX512DQVL-64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512DQVL-64-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX512DQVL-64-NEXT: retq
%result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-64-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX1-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
+; AVX512F-LABEL: uitofp_v2i32_v2f64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: uitofp_v2i32_v2f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v2i32_v2f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v2i32_v2f64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %xmm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: sitofp_v2i64_v2f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: sitofp_v2i64_v2f64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtqq2pd %xmm0, %xmm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX1-64-NEXT: retq
;
+; AVX512F-32-LABEL: uitofp_v2i64_v2f64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-32-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %xmm1, %xmm1
+; AVX512F-32-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0
+; AVX512F-32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
+; AVX512F-32-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: uitofp_v2i64_v2f64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-64-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512F-64-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1
+; AVX512F-64-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX512F-64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-64-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: uitofp_v2i64_v2f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm1
; AVX512VL-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX512VL-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v2i64_v2f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v2i64_v2f64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtuqq2pd %xmm0, %xmm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-32,AVX1-32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-64,AVX1-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-64,AVX512VL-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX-32,AVX512F-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX-64,AVX512F-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-64,AVX512VL-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-64
declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1>, metadata, metadata)
; AVX512DQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512DQ-64-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQ-64-NEXT: retq
+;
+; AVX512DQVL-32-LABEL: uitofp_v8i1_v8f32:
+; AVX512DQVL-32: # %bb.0:
+; AVX512DQVL-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
+; AVX512DQVL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512DQVL-32-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX512DQVL-32-NEXT: retl
+;
+; AVX512DQVL-64-LABEL: uitofp_v8i1_v8f32:
+; AVX512DQVL-64: # %bb.0:
+; AVX512DQVL-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512DQVL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512DQVL-64-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX512DQVL-64-NEXT: retq
%result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
+; FIXME: This is an unsafe behavior for strict FP
+; AVX512F-LABEL: uitofp_v8i32_v8f32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: uitofp_v8i32_v8f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v8i32_v8f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v8i32_v8f32:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtudq2ps %ymm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
+; AVX512F-LABEL: uitofp_v4i1_v4f64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-32-LABEL: uitofp_v4i1_v4f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f64:
+; AVX512DQVL-32: # %bb.0:
+; AVX512DQVL-32-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
+; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX512DQVL-32-NEXT: retl
+;
+; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f64:
+; AVX512DQVL-64: # %bb.0:
+; AVX512DQVL-64-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX512DQVL-64-NEXT: retq
%result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: ret{{[l|q]}}
;
+; FIXME: This is an unsafe behavior for strict FP
+; AVX512F-LABEL: uitofp_v4i32_v4f64:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512F-NEXT: ret{{[l|q]}}
+;
; AVX512VL-LABEL: uitofp_v4i32_v4f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i32_v4f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v4i32_v4f64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: sitofp_v4i64_v4f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: sitofp_v4i64_v4f64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtqq2pd %ymm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
; AVX1-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX1-64-NEXT: retq
;
+; AVX512F-32-LABEL: uitofp_v4i64_v4f64:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: vpsrlq $32, %ymm0, %ymm1
+; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %ymm1, %ymm1
+; AVX512F-32-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX512F-32-NEXT: vsubpd %ymm2, %ymm1, %ymm1
+; AVX512F-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX512F-32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
+; AVX512F-32-NEXT: vpor {{\.LCPI.*}}, %ymm0, %ymm0
+; AVX512F-32-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: uitofp_v4i64_v4f64:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-64-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
+; AVX512F-64-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX512F-64-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
+; AVX512F-64-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX512F-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
+; AVX512F-64-NEXT: vsubpd %ymm2, %ymm0, %ymm0
+; AVX512F-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX512F-64-NEXT: retq
+;
; AVX512VL-32-LABEL: uitofp_v4i64_v4f64:
; AVX512VL-32: # %bb.0:
; AVX512VL-32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm1
; AVX512VL-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX512VL-64-NEXT: retq
;
+; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: uitofp_v4i64_v4f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
+;
+; AVX512DQVL-LABEL: uitofp_v4i64_v4f64:
+; AVX512DQVL: # %bb.0:
+; AVX512DQVL-NEXT: vcvtuqq2pd %ymm0, %ymm0
+; AVX512DQVL-NEXT: ret{{[l|q]}}
%result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0