From 5401bae5230c59e887bbc987da85a360e8e21973 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 20 Nov 2016 15:24:38 +0000 Subject: [PATCH] [X86][AVX512] Add support for VBMI VPERMV3 target shuffle combines llvm-svn: 287496 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++- .../X86/vector-shuffle-combining-avx512vbmi.ll | 32 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 793176e..6560fe3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25984,7 +25984,9 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || - (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16))) { + (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || + (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || + (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll index d589a92..849c277 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll @@ -125,3 +125,35 @@ define <16 x i8> @combine_vpermt2var_vpermi2var_16i8_as_vperm2(<16 x i8> %x0, <1 %res1 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> , <16 x i8> %res0, <16 x i8> %res0, i16 -1) ret <16 x i8> %res1 } +define <32 x i8> @combine_vpermi2var_32i8_as_vperm2(<32 x i8> %x0, <32 x i8> %x1) { +; X32-LABEL: combine_vpermi2var_32i8_as_vperm2: +; X32: # BB#0: +; X32-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X32-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_vpermi2var_32i8_as_vperm2: +; X64: # BB#0: +; X64-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X64-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 +; X64-NEXT: retq + %res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> + %res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %res0, <32 x i8> , <32 x i8> %x1, i32 -1) + ret <32 x i8> %res1 +} +define <64 x i8> @combine_vpermi2var_64i8_as_vperm2(<64 x i8> %x0, <64 x i8> %x1) { +; X32-LABEL: combine_vpermi2var_64i8_as_vperm2: +; X32: # BB#0: +; X32-NEXT: vmovdqu8 {{.*#+}} zmm2 = [0,80,1,70,2,54,3,49,4,36,5,23,6,18,7,5,0,90,1,100,2,110,3,120,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X32-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_vpermi2var_64i8_as_vperm2: +; X64: # BB#0: +; X64-NEXT: vmovdqu8 {{.*#+}} zmm2 = [0,80,1,70,2,54,3,49,4,36,5,23,6,18,7,5,0,90,1,100,2,110,3,120,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X64-NEXT: vpermt2b %zmm1, %zmm2, %zmm0 +; X64-NEXT: retq + %res0 = shufflevector <64 x i8> %x0, <64 x i8> %x1, <64 x i32> + %res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %res0, <64 x i8> , <64 x i8> %x1, i64 -1) + ret <64 x i8> %res1 +} -- 2.7.4