From: Craig Topper Date: Tue, 18 Oct 2016 05:44:04 +0000 (+0000) Subject: [AVX-512] Add test case to check shuffle decoding for masked vpermilps for r284450. X-Git-Tag: llvmorg-4.0.0-rc1~6951 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=72b9f9864f15b72e61095e7524c8e6c3fb7742c4;p=platform%2Fupstream%2Fllvm.git [AVX-512] Add test case to check shuffle decoding for masked vpermilps for r284450. This is harder to do for vpermilpd as shuffle combining turns the constant vector into an immediate since all vpermilpd's inputs with constant vector can also be encoded with the immediate form. llvm-svn: 284455 --- diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index fb0af943..79d4094 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4510,6 +4510,25 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, ret <16 x float> %res4 } +; Test case to make sure we can print shuffle decode comments for constant pool loads. +define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15] +; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12] +; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1 +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> , <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> , <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> , <16 x float> %x2, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res2, %res3 + ret <16 x float> %res4 +} + declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16) define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) {