From 3c7e94208ad8145c4edb5dab15f60603b30fe80c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 4 Mar 2016 15:19:42 +0000 Subject: [PATCH] [X86][AVX512] Added some basic X86ISD::VPERMV3 shuffle combining tests None of these actually combine yet as we haven't enabled X86ISD::VPERMV3 for target shuffle combining llvm-svn: 262718 --- .../X86/vector-shuffle-combining-avx512bw.ll | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index f0b4080..022dbca 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -3,6 +3,78 @@ declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) +declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) +declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) + +declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) +declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) +declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) { +; CHECK-LABEL: combine_vpermt2var_8f64_identity: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8] +; CHECK-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res0 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> , <8 x double> %x0, <8 x double> %x1, i8 -1) + %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> , <8 x double> %res0, <8 x double> %res0, i8 -1) + ret <8 x double> %res1 +} + +define <8 x i64> @combine_vpermt2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) { +; CHECK-LABEL: combine_vpermt2var_8i64_identity: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8] +; CHECK-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> , <8 x i64> %x0, <8 x i64> %x1, i8 -1) + %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> , <8 x i64> %res0, <8 x i64> %res0, i8 -1) + ret <8 x i64> %res1 +} + +define <16 x float> @combine_vpermt2var_16f32_identity(<16 x float> %x0, <16 x float> %x1) { +; CHECK-LABEL: combine_vpermt2var_16f32_identity: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] +; CHECK-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %x0, <16 x float> %x1, i16 -1) + %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> , <16 x float> %res0, <16 x float> %res0, i16 -1) + ret <16 x float> %res1 +} + +define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32> %x1) { +; CHECK-LABEL: combine_vpermt2var_16i32_identity: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] +; CHECK-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> , <16 x i32> %x0, <16 x i32> %x1, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> , <16 x i32> %res0, <16 x i32> %res0, i16 -1) + ret <16 x i32> %res1 +} + +define <32 x i16> @combine_vpermt2var_32i16_identity(<32 x i16> %x0, <32 x i16> %x1) { +; CHECK-LABEL: combine_vpermt2var_32i16_identity: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqu16 {{.*#+}} zmm2 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; CHECK-NEXT: vpermt2w %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vmovdqu16 {{.*#+}} zmm1 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32] +; CHECK-NEXT: vpermt2w %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res0 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> , <32 x i16> %x0, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> , <32 x i16> %res0, <32 x i16> %res0, i32 -1) + ret <32 x i16> %res1 +} + define <64 x i8> @combine_pshufb_identity(<64 x i8> %x0) { ; CHECK-LABEL: combine_pshufb_identity: ; CHECK: # BB#0: -- 2.7.4