From cf9c31550c61b04985e98941e499cfacc0b3e9bf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 15 Jul 2016 11:40:27 +0000 Subject: [PATCH] [X86][AVX2] Added a memory version of test_mm256_broadcastsi128_si256 This should lower to vbroadcasti128 llvm-svn: 275552 --- llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll | 28 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index 56d77f5..430628c3 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -485,17 +485,37 @@ define <4 x double> @test_mm256_broadcastsd_pd(<4 x double> %a0) { ret <4 x double> %res } -define <4 x i64> @test_mm256_broadcastsi128_si256(<4 x i64> %a0) { +define <4 x i64> @test_mm256_broadcastsi128_si256(<2 x i64> %a0) { ; X32-LABEL: test_mm256_broadcastsi128_si256: ; X32: # BB#0: -; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] +; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_broadcastsi128_si256: ; X64: # BB#0: -; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1,0,1] +; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq - %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> + %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> + ret <4 x i64> %res +} + +define <4 x i64> @test_mm256_broadcastsi128_si256_mem(<2 x i64>* %p0) { +; X32-LABEL: test_mm256_broadcastsi128_si256_mem: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vmovaps (%eax), %xmm0 +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_broadcastsi128_si256_mem: +; X64: # BB#0: +; X64-NEXT: vmovaps (%rdi), %xmm0 +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: retq + %a0 = load <2 x i64>, <2 x i64>* %p0 + %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> ret <4 x i64> %res } -- 2.7.4