I think when this instruction was first published it was only for a Knights CPU and thus VLX version was missing.
llvm-svn: 320910
sub_xmm)>;
}
-multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, OpndItins itins,
- Predicate prd> {
- defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, itins, prd>;
+// Use 512bit version to implement 128/256 bit.
+multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
+ AVX512VLVectorVTInfo _, Predicate prd> {
+ let Predicates = [prd, NoVLX] in {
+ def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(InstrStr # "Zrr")
+ (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
+ _.info256.RC:$src1,
+ _.info256.SubRegIdx)),
+ _.info256.SubRegIdx)>;
+
+ def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(InstrStr # "Zrr")
+ (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
+ _.info128.RC:$src1,
+ _.info128.SubRegIdx)),
+ _.info128.SubRegIdx)>;
+ }
}
// FIXME: Is there a better scheduler itinerary for VPLZCNT?
-defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", SSE_INTALU_ITINS_P, HasCDI>;
+defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
+ SSE_INTALU_ITINS_P, HasCDI>;
// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
SSE_INTALU_ITINS_P, HasCDI>;
// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
-let Predicates = [HasCDI, NoVLX] in {
- def : Pat<(v4i64 (ctlz VR256X:$src)),
- (EXTRACT_SUBREG
- (VPLZCNTQZrr
- (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
- sub_ymm)>;
- def : Pat<(v2i64 (ctlz VR128X:$src)),
- (EXTRACT_SUBREG
- (VPLZCNTQZrr
- (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
- sub_xmm)>;
-
- def : Pat<(v8i32 (ctlz VR256X:$src)),
- (EXTRACT_SUBREG
- (VPLZCNTDZrr
- (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
- sub_ymm)>;
- def : Pat<(v4i32 (ctlz VR128X:$src)),
- (EXTRACT_SUBREG
- (VPLZCNTDZrr
- (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
- sub_xmm)>;
-}
+defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
+defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
//===---------------------------------------------------------------------===//
// Counts number of ones - VPOPCNTD and VPOPCNTQ
//===---------------------------------------------------------------------===//
-multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr,
- OpndItins itins, X86VectorVTInfo VTInfo> {
- let Predicates = [HasVPOPCNTDQ] in
- defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, itins, VTInfo>, EVEX_V512;
-}
-
-// Use 512bit version to implement 128/256 bit.
-multiclass avx512_unary_lowering<SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> {
- let Predicates = [prd] in {
- def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
- (EXTRACT_SUBREG
- (!cast<Instruction>(NAME # "Zrr")
- (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
- _.info256.RC:$src1,
- _.info256.SubRegIdx)),
- _.info256.SubRegIdx)>;
-
- def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
- (EXTRACT_SUBREG
- (!cast<Instruction>(NAME # "Zrr")
- (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
- _.info128.RC:$src1,
- _.info128.SubRegIdx)),
- _.info128.SubRegIdx)>;
- }
-}
-
// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
-defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", SSE_INTALU_ITINS_P,
- v16i32_info>,
- avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
+defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
+ SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
-defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", SSE_INTALU_ITINS_P,
- v8i64_info>,
- avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;
+defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
+defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
//===---------------------------------------------------------------------===//
// Replicate Single FP - MOVSHDUP and MOVSLDUP
// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
- avx512vl_i8_info, HasBITALG>,
- avx512_unary_lowering<ctpop, avx512vl_i8_info, HasBITALG>;
+ avx512vl_i8_info, HasBITALG>;
defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
- avx512vl_i16_info, HasBITALG>,
- avx512_unary_lowering<ctpop, avx512vl_i16_info, HasBITALG>, VEX_W;
+ avx512vl_i16_info, HasBITALG>, VEX_W;
+
+defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
+defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv2i64:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv2i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv4i32:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv4i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv8i16:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vzeroupper
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv8i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv16i8:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vzeroupper
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv16i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv4i64:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv4i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv8i32:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv8i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv16i16:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv16i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv32i8:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv32i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
;
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv2i64:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv2i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv2i64u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv2i64u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv4i32:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv4i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv4i32u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv4i32u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv8i16:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vzeroupper
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv8i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv8i16u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vzeroupper
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv8i16u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv16i8:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vzeroupper
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv16i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv16i8u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vzeroupper
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv16i8u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv2i64:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
+; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv2i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: movl $8, %eax
; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv2i64u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
+; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv2i64u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: movl $8, %eax
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv4i32:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv4i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv4i32u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv4i32u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv8i16:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv8i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv8i16u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv8i16u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv16i8:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv16i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: foldv16i8u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: foldv16i8u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
;
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv4i64:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv4i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv4i64u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv4i64u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv8i32:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubd %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv8i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv8i32u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubd %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv8i32u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv16i16:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv16i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv16i16u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv16i16u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv32i8:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv32i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
+; AVX512VPOPCNTDQVL-LABEL: testv32i8u:
+; AVX512VPOPCNTDQVL: # %bb.0:
+; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQVL-NEXT: retq
+;
; BITALG_NOVLX-LABEL: testv32i8u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1