// Enable Vector Neural Network Instructions
Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
+ // Enable Bit Algorithms
+ Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
+
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
"Enable AVX-512 Vector Neural Network Instructions",
[FeatureAVX512]>;
+def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
+ "Enable AVX-512 Bit Algorithms",
+ [FeatureBWI]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
}
}
+ if (Subtarget.hasBITALG())
+ for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v32i8,
+ MVT::v16i16, MVT::v16i8, MVT::v8i16 })
+ setOperationAction(ISD::CTPOP, VT, Legal);
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd>;
defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds>;
+//===----------------------------------------------------------------------===//
+// Bit Algorithms
+//===----------------------------------------------------------------------===//
+
+defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop,
+ avx512vl_i8_info, HasBITALG>,
+ avx512_unary_lowering<ctpop, avx512vl_i8_info, HasBITALG>;
+defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop,
+ avx512vl_i16_info, HasBITALG>,
+ avx512_unary_lowering<ctpop, avx512vl_i16_info, HasBITALG>, VEX_W;
+
def PKU : Predicate<"Subtarget->hasPKU()">;
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
+def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
def HasVAES : Predicate<"Subtarget->hasVAES()">;
HasADX = false;
HasPKU = false;
HasVNNI = false;
+ HasBITALG = false;
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
/// Processor has AVX-512 Vector Neural Network Instructions
bool HasVNNI;
+ /// Processor has AVX-512 Bit Algorithms instructions
+ bool HasBITALG;
+
/// Processor supports MPX - Memory Protection Extensions
bool HasMPX;
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
bool hasVNNI() const { return HasVNNI; }
+ bool hasBITALG() const { return HasBITALG; }
bool hasMPX() const { return HasMPX; }
bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
bool hasCLWB() const { return HasCLWB; }
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64:
; AVX512VPOPCNTDQ-NEXT: vpmovqw %zmm0, %xmm0
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
+;
+; BITALG_NOVLX-LABEL: testv8i16:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: vzeroupper
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv8i16:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpopcntw %xmm0, %xmm0
+; BITALG-NEXT: retq
%out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %in)
ret <8 x i16> %out
}
; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
+;
+; BITALG_NOVLX-LABEL: testv16i8:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: vzeroupper
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv16i8:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpopcntb %xmm0, %xmm0
+; BITALG-NEXT: retq
%out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %in)
ret <16 x i8> %out
}
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=BITALG_NOVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX1-LABEL: testv4i64:
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
+;
+; BITALG_NOVLX-LABEL: testv16i16:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv16i16:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpopcntw %ymm0, %ymm0
+; BITALG-NEXT: retq
%out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %in)
ret <16 x i16> %out
}
; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
+;
+; BITALG_NOVLX-LABEL: testv32i8:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv32i8:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpopcntb %ymm0, %ymm0
+; BITALG-NEXT: retq
%out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in)
ret <32 x i8> %out
}
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-NOBW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=AVX512 --check-prefix=BITALG
define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
; AVX512F-LABEL: testv8i64:
; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0
; AVX512VPOPCNTDQ-BW-NEXT: retq
+;
+; BITALG-LABEL: testv32i16:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpopcntw %zmm0, %zmm0
+; BITALG-NEXT: retq
%out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
ret <32 x i16> %out
}
; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512VPOPCNTDQ-BW-NEXT: retq
+;
+; BITALG-LABEL: testv64i8:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpopcntb %zmm0, %zmm0
+; BITALG-NEXT: retq
%out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
ret <64 x i8> %out
}
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
;
; Just one 32-bit run to make sure we do reasonable things for i64 tzcnt.
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE41
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; BITALG_NOVLX-LABEL: testv8i16:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpsubw %xmm0, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: vzeroupper
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv8i16:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpsubw %xmm0, %xmm1, %xmm1
+; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
+; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; BITALG-NEXT: vpopcntw %xmm0, %xmm0
+; BITALG-NEXT: retq
+;
; X32-SSE-LABEL: testv8i16:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pxor %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; BITALG_NOVLX-LABEL: testv16i8:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpsubb %xmm0, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: vzeroupper
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv16i8:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpsubb %xmm0, %xmm1, %xmm1
+; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
+; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; BITALG-NEXT: vpopcntb %xmm0, %xmm0
+; BITALG-NEXT: retq
+;
; X32-SSE-LABEL: testv16i8:
; X32-SSE: # BB#0:
; X32-SSE-NEXT: pxor %xmm1, %xmm1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
;
; Just one 32-bit run to make sure we do reasonable things for i64 tzcnt.
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-AVX --check-prefix=X32-AVX2
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; BITALG_NOVLX-LABEL: testv16i16:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpsubw %ymm0, %ymm1, %ymm1
+; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; BITALG_NOVLX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv16i16:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpsubw %ymm0, %ymm1, %ymm1
+; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0
+; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; BITALG-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; BITALG-NEXT: vpopcntw %ymm0, %ymm0
+; BITALG-NEXT: retq
+;
; X32-AVX-LABEL: testv16i16:
; X32-AVX: # BB#0:
; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; BITALG_NOVLX-LABEL: testv32i8:
+; BITALG_NOVLX: # BB#0:
+; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG_NOVLX-NEXT: vpsubb %ymm0, %ymm1, %ymm1
+; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; BITALG_NOVLX-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
+; BITALG_NOVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; BITALG_NOVLX-NEXT: retq
+;
+; BITALG-LABEL: testv32i8:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpsubb %ymm0, %ymm1, %ymm1
+; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0
+; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; BITALG-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; BITALG-NEXT: vpopcntb %ymm0, %ymm0
+; BITALG-NEXT: retq
+;
; X32-AVX-LABEL: testv32i8:
; X32-AVX: # BB#0:
; X32-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CDBW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
; AVX512CD-LABEL: testv8i64:
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm1, %zmm1
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: retq
+;
+; BITALG-LABEL: testv32i16:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpsubw %zmm0, %zmm1, %zmm1
+; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; BITALG-NEXT: vpaddw %zmm1, %zmm0, %zmm0
+; BITALG-NEXT: vpopcntw %zmm0, %zmm0
+; BITALG-NEXT: retq
%out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 0)
ret <32 x i16> %out
}
; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX512VPOPCNTDQ-NEXT: retq
+;
+; BITALG-LABEL: testv64i8:
+; BITALG: # BB#0:
+; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; BITALG-NEXT: vpsubb %zmm0, %zmm1, %zmm1
+; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; BITALG-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; BITALG-NEXT: vpopcntb %zmm0, %zmm0
+; BITALG-NEXT: retq
%out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 0)
ret <64 x i8> %out
}
--- /dev/null
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bitalg --show-encoding < %s | FileCheck %s
+
+// CHECK: vpopcntb %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x54,0xef]
+ vpopcntb %zmm23, %zmm21
+
+// CHECK: vpopcntw %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x54,0xef]
+ vpopcntw %zmm23, %zmm21
+
+// CHECK: vpopcntb %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4a,0x54,0xcb]
+ vpopcntb %zmm3, %zmm1 {%k2}
+
+// CHECK: vpopcntw %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4a,0x54,0xcb]
+ vpopcntw %zmm3, %zmm1 {%k2}
+
+// CHECK: vpopcntb (%rcx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x54,0x09]
+ vpopcntb (%rcx), %zmm1
+
+// CHECK: vpopcntb -256(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x54,0x4c,0x24,0xfc]
+ vpopcntb -256(%rsp), %zmm1
+
+// CHECK: vpopcntb 256(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x54,0x4c,0x24,0x04]
+ vpopcntb 256(%rsp), %zmm1
+
+// CHECK: vpopcntb 268435456(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntb 268435456(%rcx,%r14,8), %zmm1
+
+// CHECK: vpopcntb -536870912(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntb -536870912(%rcx,%r14,8), %zmm1
+
+// CHECK: vpopcntb -536870910(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntb -536870910(%rcx,%r14,8), %zmm1
+
+// CHECK: vpopcntw (%rcx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x54,0x09]
+ vpopcntw (%rcx), %zmm1
+
+// CHECK: vpopcntw -256(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x54,0x4c,0x24,0xfc]
+ vpopcntw -256(%rsp), %zmm1
+
+// CHECK: vpopcntw 256(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x54,0x4c,0x24,0x04]
+ vpopcntw 256(%rsp), %zmm1
+
+// CHECK: vpopcntw 268435456(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntw 268435456(%rcx,%r14,8), %zmm1
+
+// CHECK: vpopcntw -536870912(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntw -536870912(%rcx,%r14,8), %zmm1
+
+// CHECK: vpopcntw -536870910(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntw -536870910(%rcx,%r14,8), %zmm1
+
+// CHECK: vpopcntb (%rcx), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x54,0x29]
+ vpopcntb (%rcx), %zmm21 {%k2}
+
+// CHECK: vpopcntb -256(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x54,0x6c,0x24,0xfc]
+ vpopcntb -256(%rsp), %zmm21 {%k2}
+
+// CHECK: vpopcntb 256(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x54,0x6c,0x24,0x04]
+ vpopcntb 256(%rsp), %zmm21 {%k2}
+
+// CHECK: vpopcntb 268435456(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntb 268435456(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpopcntb -536870912(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntb -536870912(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpopcntb -536870910(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntb -536870910(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpopcntw (%rcx), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x54,0x29]
+ vpopcntw (%rcx), %zmm21 {%k2}
+
+// CHECK: vpopcntw -256(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x54,0x6c,0x24,0xfc]
+ vpopcntw -256(%rsp), %zmm21 {%k2}
+
+// CHECK: vpopcntw 256(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x54,0x6c,0x24,0x04]
+ vpopcntw 256(%rsp), %zmm21 {%k2}
+
+// CHECK: vpopcntw 268435456(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntw 268435456(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpopcntw -536870912(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntw -536870912(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpopcntw -536870910(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntw -536870910(%rcx,%r14,8), %zmm21 {%k2}
+
--- /dev/null
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl,+avx512bitalg --show-encoding < %s | FileCheck %s
+
+// CHECK: vpopcntb %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x54,0xef]
+ vpopcntb %xmm23, %xmm21
+
+// CHECK: vpopcntw %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x54,0xef]
+ vpopcntw %xmm23, %xmm21
+
+// CHECK: vpopcntb %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x0a,0x54,0xcb]
+ vpopcntb %xmm3, %xmm1 {%k2}
+
+// CHECK: vpopcntw %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x0a,0x54,0xcb]
+ vpopcntw %xmm3, %xmm1 {%k2}
+
+// CHECK: vpopcntb (%rcx), %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x54,0x09]
+ vpopcntb (%rcx), %xmm1
+
+// CHECK: vpopcntb -64(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x54,0x4c,0x24,0xfc]
+ vpopcntb -64(%rsp), %xmm1
+
+// CHECK: vpopcntb 64(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x54,0x4c,0x24,0x04]
+ vpopcntb 64(%rsp), %xmm1
+
+// CHECK: vpopcntb 268435456(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntb 268435456(%rcx,%r14,8), %xmm1
+
+// CHECK: vpopcntb -536870912(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntb -536870912(%rcx,%r14,8), %xmm1
+
+// CHECK: vpopcntb -536870910(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntb -536870910(%rcx,%r14,8), %xmm1
+
+// CHECK: vpopcntw (%rcx), %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x54,0x09]
+ vpopcntw (%rcx), %xmm1
+
+// CHECK: vpopcntw -64(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x54,0x4c,0x24,0xfc]
+ vpopcntw -64(%rsp), %xmm1
+
+// CHECK: vpopcntw 64(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x54,0x4c,0x24,0x04]
+ vpopcntw 64(%rsp), %xmm1
+
+// CHECK: vpopcntw 268435456(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntw 268435456(%rcx,%r14,8), %xmm1
+
+// CHECK: vpopcntw -536870912(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntw -536870912(%rcx,%r14,8), %xmm1
+
+// CHECK: vpopcntw -536870910(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntw -536870910(%rcx,%r14,8), %xmm1
+
+// CHECK: vpopcntb (%rcx), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x54,0x29]
+ vpopcntb (%rcx), %xmm21 {%k2}
+
+// CHECK: vpopcntb -64(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x54,0x6c,0x24,0xfc]
+ vpopcntb -64(%rsp), %xmm21 {%k2}
+
+// CHECK: vpopcntb 64(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x54,0x6c,0x24,0x04]
+ vpopcntb 64(%rsp), %xmm21 {%k2}
+
+// CHECK: vpopcntb 268435456(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntb 268435456(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpopcntb -536870912(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntb -536870912(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpopcntb -536870910(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntb -536870910(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpopcntw (%rcx), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x54,0x29]
+ vpopcntw (%rcx), %xmm21 {%k2}
+
+// CHECK: vpopcntw -64(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x54,0x6c,0x24,0xfc]
+ vpopcntw -64(%rsp), %xmm21 {%k2}
+
+// CHECK: vpopcntw 64(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x54,0x6c,0x24,0x04]
+ vpopcntw 64(%rsp), %xmm21 {%k2}
+
+// CHECK: vpopcntw 268435456(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntw 268435456(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpopcntw -536870912(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntw -536870912(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpopcntw -536870910(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntw -536870910(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpopcntb %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x54,0xef]
+ vpopcntb %ymm23, %ymm21
+
+// CHECK: vpopcntw %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x54,0xef]
+ vpopcntw %ymm23, %ymm21
+
+// CHECK: vpopcntb %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x2a,0x54,0xcb]
+ vpopcntb %ymm3, %ymm1 {%k2}
+
+// CHECK: vpopcntw %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x2a,0x54,0xcb]
+ vpopcntw %ymm3, %ymm1 {%k2}
+
+// CHECK: vpopcntb (%rcx), %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x54,0x09]
+ vpopcntb (%rcx), %ymm1
+
+// CHECK: vpopcntb -128(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x54,0x4c,0x24,0xfc]
+ vpopcntb -128(%rsp), %ymm1
+
+// CHECK: vpopcntb 128(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x54,0x4c,0x24,0x04]
+ vpopcntb 128(%rsp), %ymm1
+
+// CHECK: vpopcntb 268435456(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntb 268435456(%rcx,%r14,8), %ymm1
+
+// CHECK: vpopcntb -536870912(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntb -536870912(%rcx,%r14,8), %ymm1
+
+// CHECK: vpopcntb -536870910(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntb -536870910(%rcx,%r14,8), %ymm1
+
+// CHECK: vpopcntw (%rcx), %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x54,0x09]
+ vpopcntw (%rcx), %ymm1
+
+// CHECK: vpopcntw -128(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x54,0x4c,0x24,0xfc]
+ vpopcntw -128(%rsp), %ymm1
+
+// CHECK: vpopcntw 128(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x54,0x4c,0x24,0x04]
+ vpopcntw 128(%rsp), %ymm1
+
+// CHECK: vpopcntw 268435456(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntw 268435456(%rcx,%r14,8), %ymm1
+
+// CHECK: vpopcntw -536870912(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntw -536870912(%rcx,%r14,8), %ymm1
+
+// CHECK: vpopcntw -536870910(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntw -536870910(%rcx,%r14,8), %ymm1
+
+// CHECK: vpopcntb (%rcx), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x54,0x29]
+ vpopcntb (%rcx), %ymm21 {%k2}
+
+// CHECK: vpopcntb -128(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x54,0x6c,0x24,0xfc]
+ vpopcntb -128(%rsp), %ymm21 {%k2}
+
+// CHECK: vpopcntb 128(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x54,0x6c,0x24,0x04]
+ vpopcntb 128(%rsp), %ymm21 {%k2}
+
+// CHECK: vpopcntb 268435456(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntb 268435456(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpopcntb -536870912(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntb -536870912(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpopcntb -536870910(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntb -536870910(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpopcntw (%rcx), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x54,0x29]
+ vpopcntw (%rcx), %ymm21 {%k2}
+
+// CHECK: vpopcntw -128(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x54,0x6c,0x24,0xfc]
+ vpopcntw -128(%rsp), %ymm21 {%k2}
+
+// CHECK: vpopcntw 128(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x54,0x6c,0x24,0x04]
+ vpopcntw 128(%rsp), %ymm21 {%k2}
+
+// CHECK: vpopcntw 268435456(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10]
+ vpopcntw 268435456(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpopcntw -536870912(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0]
+ vpopcntw -536870912(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpopcntw -536870910(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
+ vpopcntw -536870910(%rcx,%r14,8), %ymm21 {%k2}
+