From e0cb1cf7e3689bdd48ef996da1780819ebc60bc0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 11 Oct 2019 07:24:36 +0000 Subject: [PATCH] [X86] Add v8i64->v8i8 ssat/usat/packus truncate tests to min-legal-vector-width.ll I wonder if we should split the v8i8 stores in order to form two v4i8 saturating truncating stores. This would remove the unpckl needed to concatenated the v4i8 results to make a single store. llvm-svn: 374519 --- llvm/test/CodeGen/X86/min-legal-vector-width.ll | 103 ++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index eb90a2a..49dae99 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1142,3 +1142,106 @@ define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32>* %p) "min-legal-vector-wi ret <32 x i8> %f } +define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + %5 = trunc <8 x i64> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + %5 = trunc <8 x i64> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + +define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_ssat_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> + %5 = trunc <8 x i64> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_ssat_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> + %5 = trunc <8 x i64> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + +define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_usat_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp ult <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = trunc <8 x i64> %2 to <8 x i8> + ret <8 x i8> %3 +} + +define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_usat_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp ult <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = trunc <8 x i64> %2 to <8 x i8> + store <8 x i8> %3, <8 x i8> *%p1 + ret void +} -- 2.7.4