define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
; GENERIC-LABEL: test_cvtpd2dq:
; GENERIC: # BB#0:
+; GENERIC-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; GENERIC-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
+; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT: retq # sched: [1:1.00]
+;
+; SANDY-LABEL: test_cvtpd2dq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
+; SANDY-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
+; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_cvtpd2dq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
+; HASWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [7:1.00]
+; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
+;
+; BROADWELL-LABEL: test_cvtpd2dq:
+; BROADWELL: # BB#0:
+; BROADWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
+; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_cvtpd2dq:
+; SKYLAKE: # BB#0:
+; SKYLAKE-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
+; SKYLAKE-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
+; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKYLAKE-NEXT: retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_cvtpd2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
+; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
+; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
+; SKX-NEXT: retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_cvtpd2dq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_cvtpd2dq:
+; ZNVER1: # BB#0:
+; ZNVER1-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
+; ZNVER1-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [5:1.00]
+; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
+ %1 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
+ %2 = load <4 x double>, <4 x double> *%a1, align 32
+ %3 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %2)
+ %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %4
+}
+declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
+
+define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) {
+; GENERIC-LABEL: test_cvttpd2dq:
+; GENERIC: # BB#0:
; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; SANDY-LABEL: test_cvtpd2dq:
+; SANDY-LABEL: test_cvttpd2dq:
; SANDY: # BB#0:
; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
-; HASWELL-LABEL: test_cvtpd2dq:
+; HASWELL-LABEL: test_cvttpd2dq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00]
; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
-; BROADWELL-LABEL: test_cvtpd2dq:
+; BROADWELL-LABEL: test_cvttpd2dq:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
-; SKYLAKE-LABEL: test_cvtpd2dq:
+; SKYLAKE-LABEL: test_cvttpd2dq:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
-; SKX-LABEL: test_cvtpd2dq:
+; SKX-LABEL: test_cvttpd2dq:
; SKX: # BB#0:
; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
-; BTVER2-LABEL: test_cvtpd2dq:
+; BTVER2-LABEL: test_cvttpd2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
-; ZNVER1-LABEL: test_cvtpd2dq:
+; ZNVER1-LABEL: test_cvttpd2dq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [5:1.00]
define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
; GENERIC-LABEL: test_cvtps2dq:
; GENERIC: # BB#0:
+; GENERIC-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
+; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT: retq # sched: [1:1.00]
+;
+; SANDY-LABEL: test_cvtps2dq:
+; SANDY: # BB#0:
+; SANDY-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
+; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
+;
+; HASWELL-LABEL: test_cvtps2dq:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; HASWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [3:1.00]
+; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; HASWELL-NEXT: retq # sched: [2:1.00]
+;
+; BROADWELL-LABEL: test_cvtps2dq:
+; BROADWELL: # BB#0:
+; BROADWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; BROADWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:1.00]
+; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
+;
+; SKYLAKE-LABEL: test_cvtps2dq:
+; SKYLAKE: # BB#0:
+; SKYLAKE-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33]
+; SKYLAKE-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
+; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKYLAKE-NEXT: retq # sched: [7:1.00]
+;
+; SKX-LABEL: test_cvtps2dq:
+; SKX: # BB#0:
+; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
+; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; SKX-NEXT: retq # sched: [7:1.00]
+;
+; BTVER2-LABEL: test_cvtps2dq:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:1.00]
+; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+;
+; ZNVER1-LABEL: test_cvtps2dq:
+; ZNVER1: # BB#0:
+; ZNVER1-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [12:1.00]
+; ZNVER1-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [5:1.00]
+; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
+ %1 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
+ %2 = load <8 x float>, <8 x float> *%a1, align 32
+ %3 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %2)
+ %4 = or <8 x i32> %1, %3
+ ret <8 x i32> %4
+}
+declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
+
+define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) {
+; GENERIC-LABEL: test_cvttps2dq:
+; GENERIC: # BB#0:
; GENERIC-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
-; SANDY-LABEL: test_cvtps2dq:
+; SANDY-LABEL: test_cvttps2dq:
; SANDY: # BB#0:
; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
-; HASWELL-LABEL: test_cvtps2dq:
+; HASWELL-LABEL: test_cvttps2dq:
; HASWELL: # BB#0:
; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [3:1.00]
; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [2:1.00]
;
-; BROADWELL-LABEL: test_cvtps2dq:
+; BROADWELL-LABEL: test_cvttps2dq:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:1.00]
; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
-; SKYLAKE-LABEL: test_cvtps2dq:
+; SKYLAKE-LABEL: test_cvttps2dq:
; SKYLAKE: # BB#0:
; SKYLAKE-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33]
; SKYLAKE-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
-; SKX-LABEL: test_cvtps2dq:
+; SKX-LABEL: test_cvttps2dq:
; SKX: # BB#0:
; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
-; BTVER2-LABEL: test_cvtps2dq:
+; BTVER2-LABEL: test_cvttps2dq:
; BTVER2: # BB#0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:1.00]
; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
-; ZNVER1-LABEL: test_cvtps2dq:
+; ZNVER1-LABEL: test_cvttps2dq:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [12:1.00]
; ZNVER1-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [5:1.00]