From fadf8b8dec9e054c321e3855b867e13c4aeb0a67 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 15 May 2018 04:26:27 +0000 Subject: [PATCH] [X86] Add fast isel tests for some of the avx512 truncate intrinsics to match current clang codegen. llvm-svn: 332326 --- .../CodeGen/X86/avx512-intrinsics-fast-isel.ll | 185 +++++++++++++++++++++ .../CodeGen/X86/avx512bw-intrinsics-fast-isel.ll | 60 +++++++ .../CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll | 74 +++++++++ .../CodeGen/X86/avx512vl-intrinsics-fast-isel.ll | 132 +++++++++++++++ 4 files changed, 451 insertions(+) diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 5968100..0cae0e9 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -2169,5 +2169,190 @@ entry: ret <8 x double> %1 } +define <2 x i64> @test_mm512_cvtepi32_epi8(<8 x i64> %__A) { +; X32-LABEL: test_mm512_cvtepi32_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: vpmovdb %zmm0, %xmm0 +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_cvtepi32_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: vpmovdb %zmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <16 x i32> + %conv.i = trunc <16 x i32> %0 to <16 x i8> + %1 = bitcast <16 x i8> %conv.i to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm512_mask_cvtepi32_epi8(<2 x i64> %__O, i16 zeroext %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_mask_cvtepi32_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vpmovdb %zmm1, %xmm0 {%k1} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_mask_cvtepi32_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovdb %zmm1, %xmm0 {%k1} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <16 x i32> + %1 = bitcast <2 x i64> %__O to <16 x i8> + %2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M) + %3 = bitcast <16 x i8> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <2 x i64> @test_mm512_maskz_cvtepi32_epi8(i16 zeroext %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_maskz_cvtepi32_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_cvtepi32_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <16 x i32> + %1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M) + %2 = bitcast <16 x i8> %1 to <2 x i64> + ret <2 x i64> %2 +} + +define <4 x i64> @test_mm512_cvtepi64_epi32(<8 x i64> %__A) { +; X32-LABEL: test_mm512_cvtepi64_epi32: +; X32: # %bb.0: # %entry +; X32-NEXT: vpmovqd %zmm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_cvtepi64_epi32: +; X64: # %bb.0: # %entry +; X64-NEXT: vpmovqd %zmm0, %ymm0 +; X64-NEXT: retq +entry: + %conv.i = trunc <8 x i64> %__A to <8 x i32> + %0 = bitcast <8 x i32> %conv.i to <4 x i64> + ret <4 x i64> %0 +} + +define <4 x i64> @test_mm512_mask_cvtepi64_epi32(<4 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_mask_cvtepi64_epi32: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovqd %zmm1, %ymm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_mask_cvtepi64_epi32: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovqd %zmm1, %ymm0 {%k1} +; X64-NEXT: retq +entry: + %conv.i.i = trunc <8 x i64> %__A to <8 x i32> + %0 = bitcast <4 x i64> %__O to <8 x i32> + %1 = bitcast i8 %__M to <8 x i1> + %2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> %0 + %3 = bitcast <8 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @test_mm512_maskz_cvtepi64_epi32(i8 zeroext %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_maskz_cvtepi64_epi32: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_cvtepi64_epi32: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} +; X64-NEXT: retq +entry: + %conv.i.i = trunc <8 x i64> %__A to <8 x i32> + %0 = bitcast i8 %__M to <8 x i1> + %1 = select <8 x i1> %0, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer + %2 = bitcast <8 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <2 x i64> @test_mm512_cvtepi64_epi16(<8 x i64> %__A) { +; X32-LABEL: test_mm512_cvtepi64_epi16: +; X32: # %bb.0: # %entry +; X32-NEXT: vpmovqw %zmm0, %xmm0 +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_cvtepi64_epi16: +; X64: # %bb.0: # %entry +; X64-NEXT: vpmovqw %zmm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %conv.i = trunc <8 x i64> %__A to <8 x i16> + %0 = bitcast <8 x i16> %conv.i to <2 x i64> + ret <2 x i64> %0 +} + +define <2 x i64> @test_mm512_mask_cvtepi64_epi16(<2 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_mask_cvtepi64_epi16: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovqw %zmm1, %xmm0 {%k1} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_mask_cvtepi64_epi16: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovqw %zmm1, %xmm0 {%k1} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <2 x i64> %__O to <8 x i16> + %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M) + %2 = bitcast <8 x i16> %1 to <2 x i64> + ret <2 x i64> %2 +} + +define <2 x i64> @test_mm512_maskz_cvtepi64_epi16(i8 zeroext %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_maskz_cvtepi64_epi16: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_cvtepi64_epi16: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M) + %1 = bitcast <8 x i16> %0 to <2 x i64> + ret <2 x i64> %1 +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) +declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) + !0 = !{i32 1} diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll index a0af130..018487a 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll @@ -801,5 +801,65 @@ entry: ret i32 %4 } +define <4 x i64> @test_mm512_cvtepi16_epi8(<8 x i64> %__A) { +; X32-LABEL: test_mm512_cvtepi16_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: vpmovwb %zmm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_cvtepi16_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: vpmovwb %zmm0, %ymm0 +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <32 x i16> + %conv.i = trunc <32 x i16> %0 to <32 x i8> + %1 = bitcast <32 x i8> %conv.i to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @test_mm512_mask_cvtepi16_epi8(<4 x i64> %__O, i32 %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_mask_cvtepi16_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vpmovwb %zmm1, %ymm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_mask_cvtepi16_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 +; X64-NEXT: vpmovwb %zmm1, %ymm0 {%k1} +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <32 x i16> + %conv.i.i = trunc <32 x i16> %0 to <32 x i8> + %1 = bitcast <4 x i64> %__O to <32 x i8> + %2 = bitcast i32 %__M to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %conv.i.i, <32 x i8> %1 + %4 = bitcast <32 x i8> %3 to <4 x i64> + ret <4 x i64> %4 +} + +define <4 x i64> @test_mm512_maskz_cvtepi16_epi8(i32 %__M, <8 x i64> %__A) { +; X32-LABEL: test_mm512_maskz_cvtepi16_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; X32-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_cvtepi16_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovd %edi, %k1 +; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} +; X64-NEXT: retq +entry: + %0 = bitcast <8 x i64> %__A to <32 x i16> + %conv.i.i = trunc <32 x i16> %0 to <32 x i8> + %1 = bitcast i32 %__M to <32 x i1> + %2 = select <32 x i1> %1, <32 x i8> %conv.i.i, <32 x i8> zeroinitializer + %3 = bitcast <32 x i8> %2 to <4 x i64> + ret <4 x i64> %3 +} + !0 = !{i32 1} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll index aac83f4..74f3f85 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll @@ -811,5 +811,79 @@ define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) { ret <4 x i64> %res2 } +define <2 x i64> @test_mm256_cvtepi16_epi8(<4 x i64> %__A) { +; X32-LABEL: test_mm256_cvtepi16_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: vpmovwb %ymm0, %xmm0 +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_cvtepi16_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: vpmovwb %ymm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__A to <16 x i16> + %conv.i = trunc <16 x i16> %0 to <16 x i8> + %1 = bitcast <16 x i8> %conv.i to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm256_mask_cvtepi16_epi8(<2 x i64> %__O, i16 zeroext %__M, <4 x i64> %__A) { +; X32-LABEL: test_mm256_mask_cvtepi16_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: kmovd %eax, %k1 +; X32-NEXT: vpmovwb %ymm1, %xmm0 {%k1} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_mask_cvtepi16_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: kmovd %eax, %k1 +; X64-NEXT: vpmovwb %ymm1, %xmm0 {%k1} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %conv1.i = and i16 %__M, 255 + %0 = bitcast <4 x i64> %__A to <16 x i16> + %conv.i.i = trunc <16 x i16> %0 to <16 x i8> + %1 = bitcast <2 x i64> %__O to <16 x i8> + %2 = bitcast i16 %conv1.i to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1 + %4 = bitcast <16 x i8> %3 to <2 x i64> + ret <2 x i64> %4 +} + +define <2 x i64> @test_mm256_maskz_cvtepi16_epi8(i16 zeroext %__M, <4 x i64> %__A) { +; X32-LABEL: test_mm256_maskz_cvtepi16_epi8: +; X32: # %bb.0: # %entry +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: kmovd %eax, %k1 +; X32-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_maskz_cvtepi16_epi8: +; X64: # %bb.0: # %entry +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: kmovd %eax, %k1 +; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %conv1.i = and i16 %__M, 255 + %0 = bitcast <4 x i64> %__A to <16 x i16> + %conv.i.i = trunc <16 x i16> %0 to <16 x i8> + %1 = bitcast i16 %conv1.i to <16 x i1> + %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer + %3 = bitcast <16 x i8> %2 to <2 x i64> + ret <2 x i64> %3 +} + !0 = !{i32 1} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll index 2914504..2279b6e 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -3404,6 +3404,137 @@ entry: ret <2 x i64> %tmp4 } +define <2 x i64> @test_mm256_cvtepi32_epi16(<4 x i64> %__A) local_unnamed_addr #0 { +; X32-LABEL: test_mm256_cvtepi32_epi16: +; X32: # %bb.0: # %entry +; X32-NEXT: vpmovdw %ymm0, %xmm0 +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_cvtepi32_epi16: +; X64: # %bb.0: # %entry +; X64-NEXT: vpmovdw %ymm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__A to <8 x i32> + %conv.i = trunc <8 x i32> %0 to <8 x i16> + %1 = bitcast <8 x i16> %conv.i to <2 x i64> + ret <2 x i64> %1 +} + +define <2 x i64> @test_mm256_mask_cvtepi32_epi16(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) { +; X32-LABEL: test_mm256_mask_cvtepi32_epi16: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovdw %ymm1, %xmm0 {%k1} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_mask_cvtepi32_epi16: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovdw %ymm1, %xmm0 {%k1} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__A to <8 x i32> + %1 = bitcast <2 x i64> %__O to <8 x i16> + %2 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> %1, i8 %__M) + %3 = bitcast <8 x i16> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <2 x i64> @test_mm256_maskz_cvtepi32_epi16(i8 zeroext %__M, <4 x i64> %__A) { +; X32-LABEL: test_mm256_maskz_cvtepi32_epi16: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_maskz_cvtepi32_epi16: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__A to <8 x i32> + %1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %0, <8 x i16> zeroinitializer, i8 %__M) + %2 = bitcast <8 x i16> %1 to <2 x i64> + ret <2 x i64> %2 +} + +define <2 x i64> @test_mm256_cvtepi64_epi32(<4 x i64> %__A) local_unnamed_addr #0 { +; X32-LABEL: test_mm256_cvtepi64_epi32: +; X32: # %bb.0: # %entry +; X32-NEXT: vpmovqd %ymm0, %xmm0 +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_cvtepi64_epi32: +; X64: # %bb.0: # %entry +; X64-NEXT: vpmovqd %ymm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %conv.i = trunc <4 x i64> %__A to <4 x i32> + %0 = bitcast <4 x i32> %conv.i to <2 x i64> + ret <2 x i64> %0 +} + +define <2 x i64> @test_mm256_mask_cvtepi64_epi32(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) { +; X32-LABEL: test_mm256_mask_cvtepi64_epi32: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovqd %ymm1, %xmm0 {%k1} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_mask_cvtepi64_epi32: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovqd %ymm1, %xmm0 {%k1} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %conv.i.i = trunc <4 x i64> %__A to <4 x i32> + %0 = bitcast <2 x i64> %__O to <4 x i32> + %1 = bitcast i8 %__M to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %2 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> %0 + %3 = bitcast <4 x i32> %2 to <2 x i64> + ret <2 x i64> %3 +} + +define <2 x i64> @test_mm256_maskz_cvtepi64_epi32(i8 zeroext %__M, <4 x i64> %__A) { +; X32-LABEL: test_mm256_maskz_cvtepi64_epi32: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} +; X32-NEXT: vzeroupper +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_maskz_cvtepi64_epi32: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} +; X64-NEXT: vzeroupper +; X64-NEXT: retq +entry: + %conv.i.i = trunc <4 x i64> %__A to <4 x i32> + %0 = bitcast i8 %__M to <8 x i1> + %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> + %1 = select <4 x i1> %extract.i, <4 x i32> %conv.i.i, <4 x i32> zeroinitializer + %2 = bitcast <4 x i32> %1 to <2 x i64> + ret <2 x i64> %2 +} + declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8) @@ -3426,5 +3557,6 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8) declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) +declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8) !0 = !{i32 1} -- 2.7.4