From fcda45a9eb88b2bd0500c71554f560700f59dedd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 1 Jul 2019 07:59:42 +0000 Subject: [PATCH] [X86] Add more load folding tests for vcvt(t)ps2(u)qq showing missed foldings. NFC llvm-svn: 364730 --- llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 747 +++++++++++++++++++++++++ llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll | 152 +++++ llvm/test/CodeGen/X86/vec_fp_to_int.ll | 152 +++++ 3 files changed, 1051 insertions(+) diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 5925fb1..4197f57 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -122,6 +122,193 @@ define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i6 ret <2 x i64> %res2 } +define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + + +define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_2(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvtps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_3(<4 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { @@ -170,6 +357,192 @@ define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i ret <2 x i64> %res2 } +define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_2(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvtps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvtps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_3(<4 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { @@ -389,6 +762,193 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i ret <2 x i64> %res2 } +define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + + +define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_2(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvttps2qq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2qq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_3(<4 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { @@ -514,6 +1074,193 @@ define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x ret <2 x i64> %res2 } +define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + + +define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_2(<2 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] +; X86-NEXT: # xmm1 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0f] +; X64-NEXT: # xmm1 = mem[0],zero +; X64-NEXT: vcvttps2uqq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-NEXT: # xmm0 = mem[0],zero +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-NEXT: # xmm0 = mem[0],zero +; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <2 x float>, <2 x float>* %p + %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_3(<4 x float>* %p) { +; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) + ret <2 x i64> %res +} + +define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) { +; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: +; X64: # %bb.0: +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %x0 = load <4 x float>, <4 x float>* %p + %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) + ret <2 x i64> %res +} + declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8) define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll index 94b860c..08cb37e 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll @@ -2642,3 +2642,155 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) { %cvt = fptoui <16 x float> %a to <16 x i8> ret <16 x i8> %cvt } + +define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) { +; SSE-LABEL: fptosi_2f32_to_2i64_load: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: cvttss2si %xmm1, %rax +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-NEXT: cvttss2si %xmm1, %rax +; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; VEX-LABEL: fptosi_2f32_to_2i64_load: +; VEX: # %bb.0: +; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; VEX-NEXT: vcvttss2si %xmm0, %rax +; VEX-NEXT: vmovq %rax, %xmm1 +; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; VEX-NEXT: vcvttss2si %xmm0, %rax +; VEX-NEXT: vmovq %rax, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptosi_2f32_to_2i64_load: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F-NEXT: vcvttss2si %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm1 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512F-NEXT: vcvttss2si %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptosi_2f32_to_2i64_load: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VL-NEXT: vcvttss2si %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm1 +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vcvttss2si %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq + %a = load <2 x float>, <2 x float>* %x + %b = fptosi <2 x float> %a to <2 x i64> + ret <2 x i64> %b +} + +define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) { +; SSE-LABEL: fptoui_2f32_to_2i64_load: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: subss %xmm2, %xmm0 +; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: cvttss2si %xmm1, %rdx +; SSE-NEXT: ucomiss %xmm2, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rdx +; SSE-NEXT: movq %rdx, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-NEXT: movaps %xmm1, %xmm3 +; SSE-NEXT: subss %xmm2, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: cvttss2si %xmm1, %rcx +; SSE-NEXT: ucomiss %xmm2, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; VEX-LABEL: fptoui_2f32_to_2i64_load: +; VEX: # %bb.0: +; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcvttss2si %xmm2, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcvttss2si %xmm3, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptoui_2f32_to_2i64_load: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F-NEXT: vcvttss2usi %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm1 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512F-NEXT: vcvttss2usi %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptoui_2f32_to_2i64_load: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm1 +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq + %a = load <2 x float>, <2 x float>* %x + %b = fptoui <2 x float> %a to <2 x i64> + ret <2 x i64> %b +} diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index e3c9a54..d4470d4 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -2924,3 +2924,155 @@ define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) { %cvt = fptoui <16 x float> %a to <16 x i8> ret <16 x i8> %cvt } + +define <2 x i64> @fptosi_2f32_to_2i64_load(<2 x float>* %x) { +; SSE-LABEL: fptosi_2f32_to_2i64_load: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: cvttss2si %xmm1, %rax +; SSE-NEXT: movq %rax, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-NEXT: cvttss2si %xmm1, %rax +; SSE-NEXT: movq %rax, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; VEX-LABEL: fptosi_2f32_to_2i64_load: +; VEX: # %bb.0: +; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; VEX-NEXT: vcvttss2si %xmm0, %rax +; VEX-NEXT: vmovq %rax, %xmm1 +; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; VEX-NEXT: vcvttss2si %xmm0, %rax +; VEX-NEXT: vmovq %rax, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptosi_2f32_to_2i64_load: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F-NEXT: vcvttss2si %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm1 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512F-NEXT: vcvttss2si %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptosi_2f32_to_2i64_load: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VL-NEXT: vcvttss2si %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm1 +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vcvttss2si %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq + %a = load <2 x float>, <2 x float>* %x + %b = fptosi <2 x float> %a to <2 x i64> + ret <2 x i64> %b +} + +define <2 x i64> @fptoui_2f32_to_2i64_load(<2 x float>* %x) { +; SSE-LABEL: fptoui_2f32_to_2i64_load: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: subss %xmm2, %xmm0 +; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: cvttss2si %xmm1, %rdx +; SSE-NEXT: ucomiss %xmm2, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rdx +; SSE-NEXT: movq %rdx, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE-NEXT: movaps %xmm1, %xmm3 +; SSE-NEXT: subss %xmm2, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: cvttss2si %xmm1, %rcx +; SSE-NEXT: ucomiss %xmm2, %xmm1 +; SSE-NEXT: cmovaeq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: retq +; +; VEX-LABEL: fptoui_2f32_to_2i64_load: +; VEX: # %bb.0: +; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcvttss2si %xmm2, %rax +; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcvttss2si %xmm3, %rax +; VEX-NEXT: xorq %rcx, %rax +; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: vucomiss %xmm1, %xmm0 +; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm0 +; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; VEX-NEXT: retq +; +; AVX512F-LABEL: fptoui_2f32_to_2i64_load: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F-NEXT: vcvttss2usi %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm1 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512F-NEXT: vcvttss2usi %xmm0, %rax +; AVX512F-NEXT: vmovq %rax, %xmm0 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptoui_2f32_to_2i64_load: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm1 +; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm0 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load: +; AVX512VLDQ: # %bb.0: +; AVX512VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq + %a = load <2 x float>, <2 x float>* %x + %b = fptoui <2 x float> %a to <2 x i64> + ret <2 x i64> %b +} -- 2.7.4