From 5a3a0231f485a76c7b2630ba059c7fc908b9d09e Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Mon, 1 Feb 2016 15:48:21 +0000 Subject: [PATCH] [X86][AVX512VBMI] add encoding and intrinsics for Multishift Differential Revision: http://reviews.llvm.org/D16399 llvm-svn: 259363 --- llvm/include/llvm/IR/IntrinsicsX86.td | 12 ++ llvm/lib/Target/X86/X86ISelLowering.cpp | 1 + llvm/lib/Target/X86/X86ISelLowering.h | 2 + llvm/lib/Target/X86/X86InstrAVX512.td | 44 +++--- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 3 + llvm/lib/Target/X86/X86IntrinsicsInfo.h | 6 + llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll | 55 ++++--- llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll | 94 ++++++++---- llvm/test/MC/X86/avx512vbmi-encoding.s | 181 +++++++++++++++++++++++ 9 files changed, 332 insertions(+), 66 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index cedc548..f66e2c8 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2715,6 +2715,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_psrl_qi_512: GCCBuiltin<"__builtin_ia32_psrlqi512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmultishift_qb_128: + GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, + llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmultishift_qb_256: + GCCBuiltin<"__builtin_ia32_vpmultishiftqb256_mask">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmultishift_qb_512: + GCCBuiltin<"__builtin_ia32_vpmultishiftqb512_mask">, + Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, + llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; } // Permute let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 327d5fd..8f52296 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21151,6 +21151,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND"; case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS"; + case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT"; } return nullptr; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index be135fc..f6f8bbe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -327,6 +327,8 @@ namespace llvm { // Vector integer comparisons, the result is in a mask vector. PCMPEQM, PCMPGTM, + MULTISHIFT, + /// Vector comparison generating mask bits for fp and /// integer signed and unsigned data types. CMPM, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ef5275f..c3cc8fb 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3370,7 +3370,8 @@ multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, SDNode OpNode,X86VectorVTInfo _Src, - X86VectorVTInfo _Dst, bit IsCommutable = 0> { + X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, + bit IsCommutable = 0> { defm rr : AVX512_maskable opc, string OpcodeStr, OpndItins itins, defm rmb : AVX512_maskable, AVX512BIBase, EVEX_4V, EVEX_B; } @@ -3428,26 +3429,35 @@ defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_I defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, SSE_INTALU_ITINS_P, HasBWI, 1>; -multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, - SDNode OpNode, bit IsCommutable = 0> { - - defm NAME#Z : avx512_binop_rm2, - EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; - let Predicates = [HasVLX] in { +multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, + AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo, + SDNode OpNode, Predicate prd, bit IsCommutable = 0> { + let Predicates = [prd] in + defm NAME#Z : avx512_binop_rm2, + EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; + let Predicates = [HasVLX, prd] in { defm NAME#Z256 : avx512_binop_rm2, - EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; + _SrcVTInfo.info256, _DstVTInfo.info256, + v4i64x_info, IsCommutable>, + EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; defm NAME#Z128 : avx512_binop_rm2, + _SrcVTInfo.info128, _DstVTInfo.info128, + v2i64x_info, IsCommutable>, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; } } defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, - X86pmuldq, 1>,T8PD; -defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, - X86pmuludq, 1>; + avx512vl_i32_info, avx512vl_i64_info, + X86pmuldq, HasAVX512, 1>,T8PD; +defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, + avx512vl_i32_info, avx512vl_i64_info, + X86pmuludq, HasAVX512, 1>; +defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P, + avx512vl_i8_info, avx512vl_i8_info, + X86multishift, HasVBMI, 0>, T8PD; multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 92bc65c..680fa57 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -95,6 +95,9 @@ def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; +def X86multishift : SDNode<"X86ISD::MULTISHIFT", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<1,2>]>>; def X86psign : SDNode<"X86ISD::PSIGN", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index be108da..7b35da1 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1342,6 +1342,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pmull_w_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0), X86_INTRINSIC_DATA(avx512_mask_pmull_w_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0), X86_INTRINSIC_DATA(avx512_mask_pmull_w_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0), + X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_128, INTR_TYPE_2OP_MASK, + X86ISD::MULTISHIFT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_256, INTR_TYPE_2OP_MASK, + X86ISD::MULTISHIFT, 0), + X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_512, INTR_TYPE_2OP_MASK, + X86ISD::MULTISHIFT, 0), X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_128, INTR_TYPE_2OP_MASK, X86ISD::PMULUDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_256, INTR_TYPE_2OP_MASK, diff --git a/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll index 06b3194..49fcaf4 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi-intrinsics.ll @@ -1,23 +1,40 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s -declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s +declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vpermb %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpermb %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vpermb %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) + %res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res3 = add <64 x i8> %res, %res1 + %res4 = add <64 x i8> %res3, %res2 + ret <64 x i8> %res4 +} -define <64 x i8>@test_int_x86_avx512_mask_permvar_qi_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovq %rdi, %k1 -; CHECK-NEXT: vpermb %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vpermb %zmm1, %zmm0, %zmm3 {%k1} {z} -; CHECK-NEXT: vpermb %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) - %res2 = call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) - %res3 = add <64 x i8> %res, %res1 - %res4 = add <64 x i8> %res3, %res2 - ret <64 x i8> %res4 +declare <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_512: +; CHECK: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1} +; CHECK: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK: vpmultishiftqb %zmm1, %zmm0, %zmm0 +; CHECK: vpaddb %zmm3, %zmm2, %zmm1 +; CHECK: vpaddb %zmm0, %zmm1, %zmm0 + %res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) + %res2 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res3 = add <64 x i8> %res, %res1 + %res4 = add <64 x i8> %res3, %res2 + ret <64 x i8> %res4 } declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) diff --git a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll index 8b909c1..690d160 100644 --- a/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmivl-intrinsics.ll @@ -1,40 +1,74 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s -declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl -mattr=+avx512vbmi --show-mc-encoding| FileCheck %s +declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermb %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpermb %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpermb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) + %res3 = add <16 x i8> %res, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpermb %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpermb %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpermb %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) + %res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res3 = add <32 x i8> %res, %res1 + %res4 = add <32 x i8> %res3, %res2 + ret <32 x i8> %res4 +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) -define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermb %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vpermb %xmm1, %xmm0, %xmm3 {%k1} {z} -; CHECK-NEXT: vpermb %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq - %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) +define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128: +; CHECK: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} +; CHECK: vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK: vpmultishiftqb %xmm1, %xmm0, %xmm0 +; CHECK: vpaddb %xmm3, %xmm2, %xmm1 +; CHECK: vpaddb %xmm0, %xmm1, %xmm0 + %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) %res3 = add <16 x i8> %res, %res1 %res4 = add <16 x i8> %res3, %res2 ret <16 x i8> %res4 } -declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) +declare <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) -define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_permvar_qi_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpermb %ymm1, %ymm0, %ymm2 {%k1} -; CHECK-NEXT: vpermb %ymm1, %ymm0, %ymm3 {%k1} {z} -; CHECK-NEXT: vpermb %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1 -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq - %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) - %res2 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) +define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256: +; CHECK: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} +; CHECK: vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK: vpmultishiftqb %ymm1, %ymm0, %ymm0 +; CHECK: vpaddb %ymm3, %ymm2, %ymm1 +; CHECK: vpaddb %ymm0, %ymm1, %ymm0 + %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) + %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) %res3 = add <32 x i8> %res, %res1 %res4 = add <32 x i8> %res3, %res2 ret <32 x i8> %res4 diff --git a/llvm/test/MC/X86/avx512vbmi-encoding.s b/llvm/test/MC/X86/avx512vbmi-encoding.s index 5e210a5..0409473 100644 --- a/llvm/test/MC/X86/avx512vbmi-encoding.s +++ b/llvm/test/MC/X86/avx512vbmi-encoding.s @@ -360,3 +360,184 @@ //CHECK: vpermi2b 4660(%rax,%r14,8), %zmm29, %zmm30 //CHECK: encoding: [0x62,0x22,0x15,0x40,0x75,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpmultishiftqb %xmm28, %xmm29, %xmm30 +//CHECK: vpmultishiftqb %xmm28, %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x02,0x95,0x00,0x83,0xf4] + + vpmultishiftqb %xmm28, %xmm29, %xmm30 {%k7} +//CHECK: vpmultishiftqb %xmm28, %xmm29, %xmm30 {%k7} +//CHECK: encoding: [0x62,0x02,0x95,0x07,0x83,0xf4] + + vpmultishiftqb %xmm28, %xmm29, %xmm30 {%k7} {z} +//CHECK: vpmultishiftqb %xmm28, %xmm29, %xmm30 {%k7} {z} +//CHECK: encoding: [0x62,0x02,0x95,0x87,0x83,0xf4] + + vpmultishiftqb (%rcx), %xmm29, %xmm30 +//CHECK: vpmultishiftqb (%rcx), %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x00,0x83,0x31] + + vpmultishiftqb 0x123(%rax,%r14,8), %xmm29, %xmm30 +//CHECK: vpmultishiftqb 291(%rax,%r14,8), %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x22,0x95,0x00,0x83,0xb4,0xf0,0x23,0x01,0x00,0x00] + + vpmultishiftqb (%rcx){1to2}, %xmm29, %xmm30 +//CHECK: vpmultishiftqb (%rcx){1to2}, %xmm29, %xmm30 + +//CHECK: encoding: [0x62,0x62,0x95,0x10,0x83,0x31] + + vpmultishiftqb 0x7f0(%rdx), %xmm29, %xmm30 +//CHECK: vpmultishiftqb 2032(%rdx), %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x00,0x83,0x72,0x7f] + + vpmultishiftqb 0x800(%rdx), %xmm29, %xmm30 +//CHECK: vpmultishiftqb 2048(%rdx), %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x00,0x83,0xb2,0x00,0x08,0x00,0x00] + + vpmultishiftqb -0x800(%rdx), %xmm29, %xmm30 +//CHECK: vpmultishiftqb -2048(%rdx), %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x00,0x83,0x72,0x80] + + vpmultishiftqb -0x810(%rdx), %xmm29, %xmm30 +//CHECK: vpmultishiftqb -2064(%rdx), %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x00,0x83,0xb2,0xf0,0xf7,0xff,0xff] + + vpmultishiftqb 0x3f8(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: vpmultishiftqb 1016(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x10,0x83,0x72,0x7f] + + vpmultishiftqb 0x400(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: vpmultishiftqb 1024(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x10,0x83,0xb2,0x00,0x04,0x00,0x00] + + vpmultishiftqb -0x400(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: vpmultishiftqb -1024(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x10,0x83,0x72,0x80] + + vpmultishiftqb -0x408(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: vpmultishiftqb -1032(%rdx){1to2}, %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x10,0x83,0xb2,0xf8,0xfb,0xff,0xff] + + vpmultishiftqb %ymm28, %ymm29, %ymm30 +//CHECK: vpmultishiftqb %ymm28, %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x02,0x95,0x20,0x83,0xf4] + + vpmultishiftqb %ymm28, %ymm29, %ymm30 {%k7} +//CHECK: vpmultishiftqb %ymm28, %ymm29, %ymm30 {%k7} +//CHECK: encoding: [0x62,0x02,0x95,0x27,0x83,0xf4] + + vpmultishiftqb %ymm28, %ymm29, %ymm30 {%k7} {z} +//CHECK: vpmultishiftqb %ymm28, %ymm29, %ymm30 {%k7} {z} +//CHECK: encoding: [0x62,0x02,0x95,0xa7,0x83,0xf4] + + vpmultishiftqb (%rcx), %ymm29, %ymm30 +//CHECK: vpmultishiftqb (%rcx), %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x20,0x83,0x31] + + vpmultishiftqb 0x123(%rax,%r14,8), %ymm29, %ymm30 +//CHECK: vpmultishiftqb 291(%rax,%r14,8), %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x22,0x95,0x20,0x83,0xb4,0xf0,0x23,0x01,0x00,0x00] + + vpmultishiftqb (%rcx){1to4}, %ymm29, %ymm30 +//CHECK: vpmultishiftqb (%rcx){1to4}, %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x30,0x83,0x31] + + vpmultishiftqb 0xfe0(%rdx), %ymm29, %ymm30 +//CHECK: vpmultishiftqb 4064(%rdx), %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x20,0x83,0x72,0x7f] + + vpmultishiftqb 0x1000(%rdx), %ymm29, %ymm30 +//CHECK: vpmultishiftqb 4096(%rdx), %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x20,0x83,0xb2,0x00,0x10,0x00,0x00] + + vpmultishiftqb -0x1000(%rdx), %ymm29, %ymm30 +//CHECK: vpmultishiftqb -4096(%rdx), %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x20,0x83,0x72,0x80] + + vpmultishiftqb -0x1020(%rdx), %ymm29, %ymm30 +//CHECK: vpmultishiftqb -4128(%rdx), %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x20,0x83,0xb2,0xe0,0xef,0xff,0xff] + + vpmultishiftqb 0x3f8(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: vpmultishiftqb 1016(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x30,0x83,0x72,0x7f] + + vpmultishiftqb 0x400(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: vpmultishiftqb 1024(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x30,0x83,0xb2,0x00,0x04,0x00,0x00] + + vpmultishiftqb -0x400(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: vpmultishiftqb -1024(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x30,0x83,0x72,0x80] + + vpmultishiftqb -0x408(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: vpmultishiftqb -1032(%rdx){1to4}, %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x62,0x95,0x30,0x83,0xb2,0xf8,0xfb,0xff,0xff] + + vpmultishiftqb 0x1234(%rax,%r14,8), %xmm29, %xmm30 +//CHECK: vpmultishiftqb 4660(%rax,%r14,8), %xmm29, %xmm30 +//CHECK: encoding: [0x62,0x22,0x95,0x00,0x83,0xb4,0xf0,0x34,0x12,0x00,0x00] + + vpmultishiftqb 0x1234(%rax,%r14,8), %ymm29, %ymm30 +//CHECK: vpmultishiftqb 4660(%rax,%r14,8), %ymm29, %ymm30 +//CHECK: encoding: [0x62,0x22,0x95,0x20,0x83,0xb4,0xf0,0x34,0x12,0x00,0x00] + + vpmultishiftqb %zmm28, %zmm29, %zmm30 +//CHECK: vpmultishiftqb %zmm28, %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x02,0x95,0x40,0x83,0xf4] + + vpmultishiftqb %zmm28, %zmm29, %zmm30 {%k7} +//CHECK: vpmultishiftqb %zmm28, %zmm29, %zmm30 {%k7} +//CHECK: encoding: [0x62,0x02,0x95,0x47,0x83,0xf4] + + vpmultishiftqb %zmm28, %zmm29, %zmm30 {%k7} {z} +//CHECK: vpmultishiftqb %zmm28, %zmm29, %zmm30 {%k7} {z} +//CHECK: encoding: [0x62,0x02,0x95,0xc7,0x83,0xf4] + + vpmultishiftqb (%rcx), %zmm29, %zmm30 +//CHECK: vpmultishiftqb (%rcx), %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x40,0x83,0x31] + + vpmultishiftqb 0x123(%rax,%r14,8), %zmm29, %zmm30 +//CHECK: vpmultishiftqb 291(%rax,%r14,8), %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x22,0x95,0x40,0x83,0xb4,0xf0,0x23,0x01,0x00,0x00] + + vpmultishiftqb (%rcx){1to8}, %zmm29, %zmm30 +//CHECK: vpmultishiftqb (%rcx){1to8}, %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x50,0x83,0x31] + + vpmultishiftqb 0x1fc0(%rdx), %zmm29, %zmm30 +//CHECK: vpmultishiftqb 8128(%rdx), %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x40,0x83,0x72,0x7f] + + vpmultishiftqb 0x2000(%rdx), %zmm29, %zmm30 +//CHECK: vpmultishiftqb 8192(%rdx), %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x40,0x83,0xb2,0x00,0x20,0x00,0x00] + + vpmultishiftqb -0x2000(%rdx), %zmm29, %zmm30 +//CHECK: vpmultishiftqb -8192(%rdx), %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x40,0x83,0x72,0x80] + + vpmultishiftqb -0x2040(%rdx), %zmm29, %zmm30 +//CHECK: vpmultishiftqb -8256(%rdx), %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x40,0x83,0xb2,0xc0,0xdf,0xff,0xff] + + vpmultishiftqb 0x3f8(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: vpmultishiftqb 1016(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x50,0x83,0x72,0x7f] + + vpmultishiftqb 0x400(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: vpmultishiftqb 1024(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x50,0x83,0xb2,0x00,0x04,0x00,0x00] + + vpmultishiftqb -0x400(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: vpmultishiftqb -1024(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x50,0x83,0x72,0x80] + + vpmultishiftqb -0x408(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: vpmultishiftqb -1032(%rdx){1to8}, %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x62,0x95,0x50,0x83,0xb2,0xf8,0xfb,0xff,0xff] + + vpmultishiftqb 0x1234(%rax,%r14,8), %zmm29, %zmm30 +//CHECK: vpmultishiftqb 4660(%rax,%r14,8), %zmm29, %zmm30 +//CHECK: encoding: [0x62,0x22,0x95,0x40,0x83,0xb4,0xf0,0x34,0x12,0x00,0x00] + -- 2.7.4