From: David Zarzycki Date: Wed, 28 Oct 2020 11:44:54 +0000 (-0400) Subject: [x86 testing] NFC: remove a few needless vector popcnt tests X-Git-Tag: llvmorg-13-init~7896 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=305d18a04b8c999162d130ca637d2adda59637e6;p=platform%2Fupstream%2Fllvm.git [x86 testing] NFC: remove a few needless vector popcnt tests The removed tests are handled by optimization passes before code gen and therefore are just a distraction when making code gen changes that may (as a side effect) reimplement earlier optimization work as a side effect. Specifically, the following tests where removed: ult_0_v* -> false ult_1_v* -> x == 0 ugt_0_v* -> x != 0 ult_{size-of-element-plus-one}_v* -> true ugt_{size-of-element}_v* -> false ult_{size-of-element}_v* -> x != mask ugt_{size-of-element-minus-one}_v* -> x == mask --- diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll index 09a15f2..e7d3197 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll @@ -11,338 +11,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG -define <16 x i8> @ult_0_v16i8(<16 x i8> %0) { -; SSE-LABEL: ult_0_v16i8: -; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: retq -; -; AVX-LABEL: ult_0_v16i8: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_0_v16i8: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_0_v16i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) - %3 = icmp ult <16 x i8> %2, - %4 = sext <16 x i1> %3 to <16 x i8> - ret <16 x i8> %4 -} - -define <16 x i8> @ugt_0_v16i8(<16 x i8> %0) { -; SSE2-LABEL: ugt_0_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_0_v16i8: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE3-NEXT: pxor %xmm0, %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_0_v16i8: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: pcmpeqb %xmm3, %xmm1 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_0_v16i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_0_v16i8: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_0_v16i8: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v16i8: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v16i8: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_0_v16i8: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_0_v16i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) - %3 = icmp ugt <16 x i8> %2, - %4 = sext <16 x i1> %3 to <16 x i8> - ret <16 x i8> %4 -} - -define <16 x i8> @ult_1_v16i8(<16 x i8> %0) { -; SSE2-LABEL: ult_1_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_1_v16i8: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_1_v16i8: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_1_v16i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_1_v16i8: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_1_v16i8: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_1_v16i8: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v16i8: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_1_v16i8: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_1_v16i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) - %3 = icmp ult <16 x i8> %2, - %4 = sext <16 x i1> %3 to <16 x i8> - ret <16 x i8> %4 -} - define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; SSE-LABEL: ugt_1_v16i8: ; SSE: # %bb.0: @@ -1883,8 +1551,123 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) { ret <16 x i8> %4 } -define <16 x i8> @ugt_7_v16i8(<16 x i8> %0) { -; SSE2-LABEL: ugt_7_v16i8: +define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { +; SSE-LABEL: ugt_1_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: paddw %xmm2, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: ugt_1_v8i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: ugt_1_v8i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i16: +; AVX512VPOPCNTDQ: # %bb.0: +; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm1, %xmm0, %xmm1 +; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VPOPCNTDQ-NEXT: vzeroupper +; AVX512VPOPCNTDQ-NEXT: retq +; +; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i16: +; AVX512VPOPCNTDQVL: # %bb.0: +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm1 +; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: retq +; +; BITALG_NOVLX-LABEL: ugt_1_v8i16: +; BITALG_NOVLX: # %bb.0: +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: ugt_1_v8i16: +; BITALG: # %bb.0: +; BITALG-NEXT: vpopcntw %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: retq + %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) + %3 = icmp ugt <8 x i16> %2, + %4 = sext <8 x i1> %3 to <8 x i16> + ret <8 x i16> %4 +} + +define <8 x i16> @ult_2_v8i16(<8 x i16> %0) { +; SSE-LABEL: ult_2_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: paddw %xmm0, %xmm1 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ult_2_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; BITALG_NOVLX-LABEL: ult_2_v8i16: +; BITALG_NOVLX: # %bb.0: +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] +; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: ult_2_v8i16: +; BITALG: # %bb.0: +; BITALG-NEXT: vpopcntw %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: retq + %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) + %3 = icmp ult <8 x i16> %2, + %4 = sext <8 x i1> %3 to <8 x i16> + ret <8 x i16> %4 +} + +define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_2_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -1900,11 +1683,14 @@ define <16 x i8> @ugt_7_v16i8(<16 x i8> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: psllw $8, %xmm0 +; SSE2-NEXT: paddb %xmm1, %xmm0 +; SSE2-NEXT: psrlw $8, %xmm0 +; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_7_v16i8: +; SSE3-LABEL: ugt_2_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -1920,11 +1706,14 @@ define <16 x i8> @ugt_7_v16i8(<16 x i8> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 +; SSE3-NEXT: psllw $8, %xmm0 +; SSE3-NEXT: paddb %xmm1, %xmm0 +; SSE3-NEXT: psrlw $8, %xmm0 +; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_7_v16i8: +; SSSE3-LABEL: ugt_2_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -1936,12 +1725,14 @@ define <16 x i8> @ugt_7_v16i8(<16 x i8> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSSE3-NEXT: pmaxub %xmm3, %xmm0 -; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: psllw $8, %xmm0 +; SSSE3-NEXT: paddb %xmm3, %xmm0 +; SSSE3-NEXT: psrlw $8, %xmm0 +; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_7_v16i8: +; SSE41-LABEL: ugt_2_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -1953,12 +1744,14 @@ define <16 x i8> @ugt_7_v16i8(<16 x i8> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE41-NEXT: pmaxub %xmm3, %xmm0 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: psllw $8, %xmm0 +; SSE41-NEXT: paddb %xmm3, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_7_v16i8: +; AVX1-LABEL: ugt_2_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1968,11 +1761,13 @@ define <16 x i8> @ugt_7_v16i8(<16 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v16i8: +; AVX2-LABEL: ugt_2_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1982,50 +1777,52 @@ define <16 x i8> @ugt_7_v16i8(<16 x i8> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v16i8: +; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v16i8: +; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v16i8: +; BITALG_NOVLX-LABEL: ugt_2_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v16i8: +; BITALG-LABEL: ugt_2_v8i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %xmm0 +; BITALG-NEXT: vpopcntw %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq - %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) - %3 = icmp ugt <16 x i8> %2, - %4 = sext <16 x i1> %3 to <16 x i8> - ret <16 x i8> %4 + %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) + %3 = icmp ugt <8 x i16> %2, + %4 = sext <8 x i1> %3 to <8 x i16> + ret <8 x i16> %4 } -define <16 x i8> @ult_8_v16i8(<16 x i8> %0) { -; SSE2-LABEL: ult_8_v16i8: +define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_3_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -2041,11 +1838,15 @@ define <16 x i8> @ult_8_v16i8(<16 x i8> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psllw $8, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 +; SSE2-NEXT: psrlw $8, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_8_v16i8: +; SSE3-LABEL: ult_3_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -2061,11 +1862,15 @@ define <16 x i8> @ult_8_v16i8(<16 x i8> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE3-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: psllw $8, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 +; SSE3-NEXT: psrlw $8, %xmm2 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_8_v16i8: +; SSSE3-LABEL: ult_3_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -2077,12 +1882,15 @@ define <16 x i8> @ult_8_v16i8(<16 x i8> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; SSSE3-NEXT: pminub %xmm3, %xmm0 -; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm1 +; SSSE3-NEXT: psllw $8, %xmm1 +; SSSE3-NEXT: paddb %xmm3, %xmm1 +; SSSE3-NEXT: psrlw $8, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_8_v16i8: +; SSE41-LABEL: ult_3_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -2094,12 +1902,15 @@ define <16 x i8> @ult_8_v16i8(<16 x i8> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; SSE41-NEXT: pminub %xmm3, %xmm0 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm1 +; SSE41-NEXT: psllw $8, %xmm1 +; SSE41-NEXT: paddb %xmm3, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_8_v16i8: +; AVX1-LABEL: ult_3_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2109,11 +1920,14 @@ define <16 x i8> @ult_8_v16i8(<16 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v16i8: +; AVX2-LABEL: ult_3_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2123,53 +1937,56 @@ define <16 x i8> @ult_8_v16i8(<16 x i8> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v16i8: +; AVX512VPOPCNTDQ-LABEL: ult_3_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v16i8: +; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v16i8: +; BITALG_NOVLX-LABEL: ult_3_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v16i8: +; BITALG-LABEL: ult_3_v8i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %xmm0 +; BITALG-NEXT: vpopcntw %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq - %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) - %3 = icmp ult <16 x i8> %2, - %4 = sext <16 x i1> %3 to <16 x i8> - ret <16 x i8> %4 + %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) + %3 = icmp ult <8 x i16> %2, + %4 = sext <8 x i1> %3 to <8 x i16> + ret <8 x i16> %4 } -define <16 x i8> @ugt_8_v16i8(<16 x i8> %0) { -; SSE2-LABEL: ugt_8_v16i8: +define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_3_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -2185,11 +2002,14 @@ define <16 x i8> @ugt_8_v16i8(<16 x i8> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: psllw $8, %xmm0 +; SSE2-NEXT: paddb %xmm1, %xmm0 +; SSE2-NEXT: psrlw $8, %xmm0 +; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_8_v16i8: +; SSE3-LABEL: ugt_3_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -2205,11 +2025,14 @@ define <16 x i8> @ugt_8_v16i8(<16 x i8> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pcmpgtb {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 +; SSE3-NEXT: psllw $8, %xmm0 +; SSE3-NEXT: paddb %xmm1, %xmm0 +; SSE3-NEXT: psrlw $8, %xmm0 +; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_8_v16i8: +; SSSE3-LABEL: ugt_3_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -2221,12 +2044,14 @@ define <16 x i8> @ugt_8_v16i8(<16 x i8> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; SSSE3-NEXT: pmaxub %xmm3, %xmm0 -; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: psllw $8, %xmm0 +; SSSE3-NEXT: paddb %xmm3, %xmm0 +; SSSE3-NEXT: psrlw $8, %xmm0 +; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_8_v16i8: +; SSE41-LABEL: ugt_3_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -2238,12 +2063,14 @@ define <16 x i8> @ugt_8_v16i8(<16 x i8> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; SSE41-NEXT: pmaxub %xmm3, %xmm0 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: psllw $8, %xmm0 +; SSE41-NEXT: paddb %xmm3, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_8_v16i8: +; AVX1-LABEL: ugt_3_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2253,11 +2080,13 @@ define <16 x i8> @ugt_8_v16i8(<16 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v16i8: +; AVX2-LABEL: ugt_3_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2267,50 +2096,52 @@ define <16 x i8> @ugt_8_v16i8(<16 x i8> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v16i8: +; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v16i8: +; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v16i8: +; BITALG_NOVLX-LABEL: ugt_3_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v16i8: +; BITALG-LABEL: ugt_3_v8i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %xmm0 +; BITALG-NEXT: vpopcntw %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq - %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) - %3 = icmp ugt <16 x i8> %2, - %4 = sext <16 x i1> %3 to <16 x i8> - ret <16 x i8> %4 + %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) + %3 = icmp ugt <8 x i16> %2, + %4 = sext <8 x i1> %3 to <8 x i16> + ret <8 x i16> %4 } -define <16 x i8> @ult_9_v16i8(<16 x i8> %0) { -; SSE2-LABEL: ult_9_v16i8: +define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_4_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -2326,11 +2157,15 @@ define <16 x i8> @ult_9_v16i8(<16 x i8> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psllw $8, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 +; SSE2-NEXT: psrlw $8, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_9_v16i8: +; SSE3-LABEL: ult_4_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -2346,11 +2181,15 @@ define <16 x i8> @ult_9_v16i8(<16 x i8> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; SSE3-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: psllw $8, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 +; SSE3-NEXT: psrlw $8, %xmm2 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_9_v16i8: +; SSSE3-LABEL: ult_4_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -2362,12 +2201,15 @@ define <16 x i8> @ult_9_v16i8(<16 x i8> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSSE3-NEXT: pminub %xmm3, %xmm0 -; SSSE3-NEXT: pcmpeqb %xmm3, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm1 +; SSSE3-NEXT: psllw $8, %xmm1 +; SSSE3-NEXT: paddb %xmm3, %xmm1 +; SSSE3-NEXT: psrlw $8, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_9_v16i8: +; SSE41-LABEL: ult_4_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -2379,12 +2221,15 @@ define <16 x i8> @ult_9_v16i8(<16 x i8> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; SSE41-NEXT: pminub %xmm3, %xmm0 -; SSE41-NEXT: pcmpeqb %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm1 +; SSE41-NEXT: psllw $8, %xmm1 +; SSE41-NEXT: paddb %xmm3, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_9_v16i8: +; AVX1-LABEL: ult_4_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2394,11 +2239,14 @@ define <16 x i8> @ult_9_v16i8(<16 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v16i8: +; AVX2-LABEL: ult_4_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2408,79 +2256,56 @@ define <16 x i8> @ult_9_v16i8(<16 x i8> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm1 -; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v16i8: +; AVX512VPOPCNTDQ-LABEL: ult_4_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v16i8: +; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v16i8: +; BITALG_NOVLX-LABEL: ult_4_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v16i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) - %3 = icmp ult <16 x i8> %2, - %4 = sext <16 x i1> %3 to <16 x i8> - ret <16 x i8> %4 -} - -define <8 x i16> @ult_0_v8i16(<8 x i16> %0) { -; SSE-LABEL: ult_0_v8i16: -; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: retq -; -; AVX-LABEL: ult_0_v8i16: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_0_v8i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_0_v8i16: +; BITALG-LABEL: ult_4_v8i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntw %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_0_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_0_v8i16: +define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_4_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -2500,13 +2325,10 @@ define <8 x i16> @ugt_0_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm0 ; SSE2-NEXT: paddb %xmm1, %xmm0 ; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_0_v8i16: +; SSE3-LABEL: ugt_4_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -2526,13 +2348,10 @@ define <8 x i16> @ugt_0_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm0 ; SSE3-NEXT: paddb %xmm1, %xmm0 ; SSE3-NEXT: psrlw $8, %xmm0 -; SSE3-NEXT: pxor %xmm1, %xmm1 -; SSE3-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_0_v8i16: +; SSSE3-LABEL: ugt_4_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -2548,13 +2367,10 @@ define <8 x i16> @ugt_0_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm0 ; SSSE3-NEXT: paddb %xmm3, %xmm0 ; SSSE3-NEXT: psrlw $8, %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: pcmpeqw %xmm0, %xmm1 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_0_v8i16: +; SSE41-LABEL: ugt_4_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -2570,13 +2386,10 @@ define <8 x i16> @ugt_0_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm0 ; SSE41-NEXT: paddb %xmm3, %xmm0 ; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_0_v8i16: +; AVX1-LABEL: ugt_4_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2589,13 +2402,10 @@ define <8 x i16> @ugt_0_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_0_v8i16: +; AVX2-LABEL: ugt_4_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2608,61 +2418,49 @@ define <8 x i16> @ugt_0_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_0_v8i16: +; BITALG_NOVLX-LABEL: ugt_4_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_0_v8i16: +; BITALG-LABEL: ugt_4_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_1_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_1_v8i16: +define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_5_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -2678,15 +2476,15 @@ define <8 x i16> @ult_1_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psllw $8, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psllw $8, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 +; SSE2-NEXT: psrlw $8, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_1_v8i16: +; SSE3-LABEL: ult_5_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -2702,15 +2500,15 @@ define <8 x i16> @ult_1_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psllw $8, %xmm0 -; SSE3-NEXT: paddb %xmm1, %xmm0 -; SSE3-NEXT: psrlw $8, %xmm0 -; SSE3-NEXT: pxor %xmm1, %xmm1 -; SSE3-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: psllw $8, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 +; SSE3-NEXT: psrlw $8, %xmm2 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_1_v8i16: +; SSSE3-LABEL: ult_5_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -2726,11 +2524,11 @@ define <8 x i16> @ult_1_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: pcmpeqw %xmm1, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_1_v8i16: +; SSE41-LABEL: ult_5_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -2746,11 +2544,11 @@ define <8 x i16> @ult_1_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_1_v8i16: +; AVX1-LABEL: ult_5_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2763,11 +2561,11 @@ define <8 x i16> @ult_1_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_1_v8i16: +; AVX2-LABEL: ult_5_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2780,168 +2578,53 @@ define <8 x i16> @ult_1_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_1_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_5_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_1_v8i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_1_v8i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 -} - -define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { -; SSE-LABEL: ugt_1_v8i16: -; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: paddw %xmm2, %xmm1 -; SSE-NEXT: pand %xmm0, %xmm1 -; SSE-NEXT: pxor %xmm0, %xmm0 -; SSE-NEXT: pcmpeqw %xmm0, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: retq -; -; AVX1-LABEL: ugt_1_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_1_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm1, %xmm0, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_1_v8i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_1_v8i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 -} - -define <8 x i16> @ult_2_v8i16(<8 x i16> %0) { -; SSE-LABEL: ult_2_v8i16: -; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: paddw %xmm0, %xmm1 -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE-NEXT: retq -; -; AVX-LABEL: ult_2_v8i16: -; AVX: # %bb.0: -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_2_v8i16: +; BITALG_NOVLX-LABEL: ult_5_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_2_v8i16: +; BITALG-LABEL: ult_5_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_2_v8i16: +define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_5_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -2964,7 +2647,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_2_v8i16: +; SSE3-LABEL: ugt_5_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -2987,7 +2670,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_2_v8i16: +; SSSE3-LABEL: ugt_5_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -3006,7 +2689,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_2_v8i16: +; SSE41-LABEL: ugt_5_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -3025,7 +2708,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_2_v8i16: +; AVX1-LABEL: ugt_5_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3041,7 +2724,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_2_v8i16: +; AVX2-LABEL: ugt_5_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3057,7 +2740,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -3066,7 +2749,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -3075,7 +2758,7 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_2_v8i16: +; BITALG_NOVLX-LABEL: ugt_5_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -3083,20 +2766,20 @@ define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_2_v8i16: +; BITALG-LABEL: ugt_5_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_3_v8i16: +define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_6_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -3116,11 +2799,11 @@ define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_3_v8i16: +; SSE3-LABEL: ult_6_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -3140,11 +2823,11 @@ define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_3_v8i16: +; SSSE3-LABEL: ult_6_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -3160,11 +2843,11 @@ define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_3_v8i16: +; SSE41-LABEL: ult_6_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -3180,11 +2863,11 @@ define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_3_v8i16: +; AVX1-LABEL: ult_6_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3197,11 +2880,11 @@ define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_3_v8i16: +; AVX2-LABEL: ult_6_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3214,53 +2897,53 @@ define <8 x i16> @ult_3_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_3_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_6_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_3_v8i16: +; BITALG_NOVLX-LABEL: ult_6_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_3_v8i16: +; BITALG-LABEL: ult_6_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_3_v8i16: +define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_6_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -3283,7 +2966,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_3_v8i16: +; SSE3-LABEL: ugt_6_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -3306,7 +2989,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_3_v8i16: +; SSSE3-LABEL: ugt_6_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -3325,7 +3008,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_3_v8i16: +; SSE41-LABEL: ugt_6_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -3344,7 +3027,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_3_v8i16: +; AVX1-LABEL: ugt_6_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3360,7 +3043,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_3_v8i16: +; AVX2-LABEL: ugt_6_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3376,7 +3059,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -3385,7 +3068,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -3394,7 +3077,7 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_3_v8i16: +; BITALG_NOVLX-LABEL: ugt_6_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -3402,20 +3085,20 @@ define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_3_v8i16: +; BITALG-LABEL: ugt_6_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_4_v8i16: +define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_7_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -3435,11 +3118,11 @@ define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_4_v8i16: +; SSE3-LABEL: ult_7_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -3459,11 +3142,11 @@ define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_4_v8i16: +; SSSE3-LABEL: ult_7_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -3479,11 +3162,11 @@ define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_4_v8i16: +; SSE41-LABEL: ult_7_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -3499,11 +3182,11 @@ define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_4_v8i16: +; AVX1-LABEL: ult_7_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3516,11 +3199,11 @@ define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_4_v8i16: +; AVX2-LABEL: ult_7_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3533,53 +3216,53 @@ define <8 x i16> @ult_4_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_4_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_7_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_4_v8i16: +; BITALG_NOVLX-LABEL: ult_7_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_4_v8i16: +; BITALG-LABEL: ult_7_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_4_v8i16: +define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_7_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -3602,7 +3285,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_4_v8i16: +; SSE3-LABEL: ugt_7_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -3625,7 +3308,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_4_v8i16: +; SSSE3-LABEL: ugt_7_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -3644,7 +3327,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_4_v8i16: +; SSE41-LABEL: ugt_7_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -3663,7 +3346,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_4_v8i16: +; AVX1-LABEL: ugt_7_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3679,7 +3362,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_4_v8i16: +; AVX2-LABEL: ugt_7_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3695,7 +3378,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -3704,7 +3387,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -3713,7 +3396,7 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_4_v8i16: +; BITALG_NOVLX-LABEL: ugt_7_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -3721,20 +3404,20 @@ define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_4_v8i16: +; BITALG-LABEL: ugt_7_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_5_v8i16: +define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_8_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -3754,11 +3437,11 @@ define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_5_v8i16: +; SSE3-LABEL: ult_8_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -3778,11 +3461,11 @@ define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_5_v8i16: +; SSSE3-LABEL: ult_8_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -3798,11 +3481,11 @@ define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_5_v8i16: +; SSE41-LABEL: ult_8_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -3818,11 +3501,11 @@ define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_5_v8i16: +; AVX1-LABEL: ult_8_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3835,11 +3518,11 @@ define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_5_v8i16: +; AVX2-LABEL: ult_8_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3852,53 +3535,53 @@ define <8 x i16> @ult_5_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_5_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_8_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_5_v8i16: +; BITALG_NOVLX-LABEL: ult_8_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_5_v8i16: +; BITALG-LABEL: ult_8_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_5_v8i16: +define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_8_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -3921,7 +3604,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_5_v8i16: +; SSE3-LABEL: ugt_8_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -3944,7 +3627,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_5_v8i16: +; SSSE3-LABEL: ugt_8_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -3963,7 +3646,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_5_v8i16: +; SSE41-LABEL: ugt_8_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -3982,7 +3665,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_5_v8i16: +; AVX1-LABEL: ugt_8_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3998,7 +3681,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_5_v8i16: +; AVX2-LABEL: ugt_8_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4014,7 +3697,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -4023,7 +3706,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -4032,7 +3715,7 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_5_v8i16: +; BITALG_NOVLX-LABEL: ugt_8_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -4040,20 +3723,20 @@ define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_5_v8i16: +; BITALG-LABEL: ugt_8_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_6_v8i16: +define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_9_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -4073,11 +3756,11 @@ define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_6_v8i16: +; SSE3-LABEL: ult_9_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -4097,11 +3780,11 @@ define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_6_v8i16: +; SSSE3-LABEL: ult_9_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -4117,11 +3800,11 @@ define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_6_v8i16: +; SSE41-LABEL: ult_9_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -4137,11 +3820,11 @@ define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_6_v8i16: +; AVX1-LABEL: ult_9_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4154,11 +3837,11 @@ define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_6_v8i16: +; AVX2-LABEL: ult_9_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4171,53 +3854,53 @@ define <8 x i16> @ult_6_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_6_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_9_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_6_v8i16: +; BITALG_NOVLX-LABEL: ult_9_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_6_v8i16: +; BITALG-LABEL: ult_9_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_6_v8i16: +define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_9_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -4240,7 +3923,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_6_v8i16: +; SSE3-LABEL: ugt_9_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -4263,7 +3946,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_6_v8i16: +; SSSE3-LABEL: ugt_9_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -4282,7 +3965,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_6_v8i16: +; SSE41-LABEL: ugt_9_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -4301,7 +3984,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_6_v8i16: +; AVX1-LABEL: ugt_9_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4317,7 +4000,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_6_v8i16: +; AVX2-LABEL: ugt_9_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4333,7 +4016,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -4342,7 +4025,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -4351,7 +4034,7 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_6_v8i16: +; BITALG_NOVLX-LABEL: ugt_9_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -4359,20 +4042,20 @@ define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_6_v8i16: +; BITALG-LABEL: ugt_9_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_7_v8i16: +define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_10_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -4392,11 +4075,11 @@ define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_7_v8i16: +; SSE3-LABEL: ult_10_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -4416,11 +4099,11 @@ define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_7_v8i16: +; SSSE3-LABEL: ult_10_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -4436,11 +4119,11 @@ define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_7_v8i16: +; SSE41-LABEL: ult_10_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -4456,11 +4139,11 @@ define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_7_v8i16: +; AVX1-LABEL: ult_10_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4473,11 +4156,11 @@ define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_7_v8i16: +; AVX2-LABEL: ult_10_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4490,53 +4173,53 @@ define <8 x i16> @ult_7_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_7_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_10_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_7_v8i16: +; BITALG_NOVLX-LABEL: ult_10_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_7_v8i16: +; BITALG-LABEL: ult_10_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_7_v8i16: +define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_10_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -4559,7 +4242,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_7_v8i16: +; SSE3-LABEL: ugt_10_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -4582,7 +4265,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_7_v8i16: +; SSSE3-LABEL: ugt_10_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -4601,7 +4284,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_7_v8i16: +; SSE41-LABEL: ugt_10_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -4620,7 +4303,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_7_v8i16: +; AVX1-LABEL: ugt_10_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4636,7 +4319,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v8i16: +; AVX2-LABEL: ugt_10_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4652,7 +4335,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -4661,7 +4344,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -4670,7 +4353,7 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v8i16: +; BITALG_NOVLX-LABEL: ugt_10_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -4678,20 +4361,20 @@ define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v8i16: +; BITALG-LABEL: ugt_10_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_8_v8i16: +define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_11_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -4711,11 +4394,11 @@ define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_8_v8i16: +; SSE3-LABEL: ult_11_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -4735,11 +4418,11 @@ define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_8_v8i16: +; SSSE3-LABEL: ult_11_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -4755,11 +4438,11 @@ define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_8_v8i16: +; SSE41-LABEL: ult_11_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -4775,11 +4458,11 @@ define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_8_v8i16: +; AVX1-LABEL: ult_11_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4792,11 +4475,11 @@ define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v8i16: +; AVX2-LABEL: ult_11_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4809,53 +4492,53 @@ define <8 x i16> @ult_8_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_11_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v8i16: +; BITALG_NOVLX-LABEL: ult_11_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v8i16: +; BITALG-LABEL: ult_11_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_8_v8i16: +define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_11_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -4878,7 +4561,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_8_v8i16: +; SSE3-LABEL: ugt_11_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -4901,7 +4584,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_8_v8i16: +; SSSE3-LABEL: ugt_11_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -4920,7 +4603,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_8_v8i16: +; SSE41-LABEL: ugt_11_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -4939,7 +4622,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_8_v8i16: +; AVX1-LABEL: ugt_11_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4955,7 +4638,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v8i16: +; AVX2-LABEL: ugt_11_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -4971,7 +4654,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -4980,7 +4663,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -4989,7 +4672,7 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v8i16: +; BITALG_NOVLX-LABEL: ugt_11_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -4997,20 +4680,20 @@ define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v8i16: +; BITALG-LABEL: ugt_11_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_9_v8i16: +define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_12_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -5030,11 +4713,11 @@ define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_9_v8i16: +; SSE3-LABEL: ult_12_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -5054,11 +4737,11 @@ define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_9_v8i16: +; SSSE3-LABEL: ult_12_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -5074,11 +4757,11 @@ define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_9_v8i16: +; SSE41-LABEL: ult_12_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -5094,11 +4777,11 @@ define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_9_v8i16: +; AVX1-LABEL: ult_12_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5111,11 +4794,11 @@ define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v8i16: +; AVX2-LABEL: ult_12_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5128,53 +4811,53 @@ define <8 x i16> @ult_9_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_12_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v8i16: +; BITALG_NOVLX-LABEL: ult_12_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v8i16: +; BITALG-LABEL: ult_12_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_9_v8i16: +define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_12_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -5197,7 +4880,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_9_v8i16: +; SSE3-LABEL: ugt_12_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -5220,7 +4903,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_9_v8i16: +; SSSE3-LABEL: ugt_12_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -5239,7 +4922,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_9_v8i16: +; SSE41-LABEL: ugt_12_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -5258,7 +4941,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_9_v8i16: +; AVX1-LABEL: ugt_12_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5274,7 +4957,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_9_v8i16: +; AVX2-LABEL: ugt_12_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5290,7 +4973,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -5299,7 +4982,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -5308,7 +4991,7 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_9_v8i16: +; BITALG_NOVLX-LABEL: ugt_12_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -5316,20 +4999,20 @@ define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_9_v8i16: +; BITALG-LABEL: ugt_12_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_10_v8i16: +define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_13_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -5349,11 +5032,11 @@ define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_10_v8i16: +; SSE3-LABEL: ult_13_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -5373,11 +5056,11 @@ define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_10_v8i16: +; SSSE3-LABEL: ult_13_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -5393,11 +5076,11 @@ define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_10_v8i16: +; SSE41-LABEL: ult_13_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -5413,11 +5096,11 @@ define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_10_v8i16: +; AVX1-LABEL: ult_13_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5430,11 +5113,11 @@ define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_10_v8i16: +; AVX2-LABEL: ult_13_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5447,53 +5130,53 @@ define <8 x i16> @ult_10_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_10_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_13_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_10_v8i16: +; BITALG_NOVLX-LABEL: ult_13_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_10_v8i16: +; BITALG-LABEL: ult_13_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_10_v8i16: +define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_13_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -5516,7 +5199,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_10_v8i16: +; SSE3-LABEL: ugt_13_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -5539,7 +5222,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_10_v8i16: +; SSSE3-LABEL: ugt_13_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -5558,7 +5241,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_10_v8i16: +; SSE41-LABEL: ugt_13_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -5577,7 +5260,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_10_v8i16: +; AVX1-LABEL: ugt_13_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5593,7 +5276,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_10_v8i16: +; AVX2-LABEL: ugt_13_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5609,7 +5292,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -5618,7 +5301,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -5627,7 +5310,7 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_10_v8i16: +; BITALG_NOVLX-LABEL: ugt_13_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -5635,20 +5318,20 @@ define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_10_v8i16: +; BITALG-LABEL: ugt_13_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_11_v8i16: +define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_14_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -5668,11 +5351,11 @@ define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_11_v8i16: +; SSE3-LABEL: ult_14_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -5692,11 +5375,11 @@ define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_11_v8i16: +; SSSE3-LABEL: ult_14_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -5712,11 +5395,11 @@ define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_11_v8i16: +; SSE41-LABEL: ult_14_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -5732,11 +5415,11 @@ define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_11_v8i16: +; AVX1-LABEL: ult_14_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5749,11 +5432,11 @@ define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_11_v8i16: +; AVX2-LABEL: ult_14_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5766,53 +5449,53 @@ define <8 x i16> @ult_11_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_11_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_14_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_11_v8i16: +; BITALG_NOVLX-LABEL: ult_14_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_11_v8i16: +; BITALG-LABEL: ult_14_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_11_v8i16: +define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ugt_14_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -5835,7 +5518,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_11_v8i16: +; SSE3-LABEL: ugt_14_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -5858,7 +5541,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_11_v8i16: +; SSSE3-LABEL: ugt_14_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -5877,7 +5560,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_11_v8i16: +; SSE41-LABEL: ugt_14_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -5896,7 +5579,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_11_v8i16: +; AVX1-LABEL: ugt_14_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5912,7 +5595,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_11_v8i16: +; AVX2-LABEL: ugt_14_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5928,7 +5611,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -5937,7 +5620,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 @@ -5946,7 +5629,7 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_11_v8i16: +; BITALG_NOVLX-LABEL: ugt_14_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 @@ -5954,20 +5637,20 @@ define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_11_v8i16: +; BITALG-LABEL: ugt_14_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, + %3 = icmp ugt <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_12_v8i16: +define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { +; SSE2-LABEL: ult_15_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -5987,11 +5670,11 @@ define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psllw $8, %xmm2 ; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_12_v8i16: +; SSE3-LABEL: ult_15_v8i16: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -6011,11 +5694,11 @@ define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psllw $8, %xmm2 ; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_12_v8i16: +; SSSE3-LABEL: ult_15_v8i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -6031,11 +5714,11 @@ define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: psllw $8, %xmm1 ; SSSE3-NEXT: paddb %xmm3, %xmm1 ; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_12_v8i16: +; SSE41-LABEL: ult_15_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -6051,11 +5734,11 @@ define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { ; SSE41-NEXT: psllw $8, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_12_v8i16: +; AVX1-LABEL: ult_15_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6068,11 +5751,11 @@ define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_12_v8i16: +; AVX2-LABEL: ult_15_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6085,53 +5768,198 @@ define <8 x i16> @ult_12_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_12_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_15_v8i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_12_v8i16: +; BITALG_NOVLX-LABEL: ult_15_v8i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_12_v8i16: +; BITALG-LABEL: ult_15_v8i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %xmm0, %xmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, + %3 = icmp ult <8 x i16> %2, %4 = sext <8 x i1> %3 to <8 x i16> ret <8 x i16> %4 } -define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_12_v8i16: +define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { +; SSE-LABEL: ugt_1_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: paddd %xmm2, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: ugt_1_v4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: ugt_1_v4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i32: +; AVX512VPOPCNTDQ: # %bb.0: +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vzeroupper +; AVX512VPOPCNTDQ-NEXT: retq +; +; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i32: +; AVX512VPOPCNTDQVL: # %bb.0: +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512VPOPCNTDQVL-NEXT: retq +; +; BITALG_NOVLX-LABEL: ugt_1_v4i32: +; BITALG_NOVLX: # %bb.0: +; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; BITALG_NOVLX-NEXT: vzeroupper +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: ugt_1_v4i32: +; BITALG: # %bb.0: +; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: retq + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ugt <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 +} + +define <4 x i32> @ult_2_v4i32(<4 x i32> %0) { +; SSE-LABEL: ult_2_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: ult_2_v4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: ult_2_v4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512VPOPCNTDQ-LABEL: ult_2_v4i32: +; AVX512VPOPCNTDQ: # %bb.0: +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vzeroupper +; AVX512VPOPCNTDQ-NEXT: retq +; +; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i32: +; AVX512VPOPCNTDQVL: # %bb.0: +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512VPOPCNTDQVL-NEXT: retq +; +; BITALG_NOVLX-LABEL: ult_2_v4i32: +; BITALG_NOVLX: # %bb.0: +; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: ult_2_v4i32: +; BITALG: # %bb.0: +; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: retq + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ult <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 +} + +define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_2_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -6147,14 +5975,18 @@ define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psllw $8, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_12_v8i16: +; SSE3-LABEL: ugt_2_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -6170,33 +6002,41 @@ define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psllw $8, %xmm0 -; SSE3-NEXT: paddb %xmm1, %xmm0 -; SSE3-NEXT: psrlw $8, %xmm0 -; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_12_v8i16: +; SSSE3-LABEL: ugt_2_v4i32: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pand %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: pshufb %xmm3, %xmm4 ; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: psllw $8, %xmm0 -; SSSE3-NEXT: paddb %xmm3, %xmm0 -; SSSE3-NEXT: psrlw $8, %xmm0 -; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pshufb %xmm0, %xmm1 +; SSSE3-NEXT: paddb %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: packuswb %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_12_v8i16: +; SSE41-LABEL: ugt_2_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -6208,14 +6048,16 @@ define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: psllw $8, %xmm0 -; SSE41-NEXT: paddb %xmm3, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE41-NEXT: psadbw %xmm1, %xmm3 +; SSE41-NEXT: psadbw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm3, %xmm0 +; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_12_v8i16: +; AVX1-LABEL: ugt_2_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6225,13 +6067,16 @@ define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_12_v8i16: +; AVX2-LABEL: ugt_2_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6241,52 +6086,69 @@ define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_12_v8i16: +; BITALG_NOVLX-LABEL: ugt_2_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_12_v8i16: +; BITALG-LABEL: ugt_2_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ugt <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 } -define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_13_v8i16: +define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_3_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -6302,15 +6164,18 @@ define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psllw $8, %xmm2 -; SSE2-NEXT: paddb %xmm1, %xmm2 -; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] -; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_13_v8i16: +; SSE3-LABEL: ult_3_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -6326,15 +6191,18 @@ define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: psllw $8, %xmm2 -; SSE3-NEXT: paddb %xmm1, %xmm2 -; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] -; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_13_v8i16: +; SSSE3-LABEL: ult_3_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -6346,15 +6214,18 @@ define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 +; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: psllw $8, %xmm1 -; SSSE3-NEXT: paddb %xmm3, %xmm1 -; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] -; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm3 +; SSSE3-NEXT: packuswb %xmm1, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_13_v8i16: +; SSE41-LABEL: ult_3_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -6366,15 +6237,17 @@ define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm1 -; SSE41-NEXT: psllw $8, %xmm1 -; SSE41-NEXT: paddb %xmm3, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13] -; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSE41-NEXT: psadbw %xmm0, %xmm3 +; SSE41-NEXT: psadbw %xmm0, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_13_v8i16: +; AVX1-LABEL: ult_3_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6384,14 +6257,17 @@ define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_13_v8i16: +; AVX2-LABEL: ult_3_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6401,56 +6277,69 @@ define <8 x i16> @ult_13_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] -; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_13_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_3_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_13_v8i16: +; BITALG_NOVLX-LABEL: ult_3_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] -; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_13_v8i16: +; BITALG-LABEL: ult_3_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ult <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 } -define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_13_v8i16: +define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_3_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -6466,14 +6355,18 @@ define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psllw $8, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_13_v8i16: +; SSE3-LABEL: ugt_3_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -6489,33 +6382,41 @@ define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psllw $8, %xmm0 -; SSE3-NEXT: paddb %xmm1, %xmm0 -; SSE3-NEXT: psrlw $8, %xmm0 -; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_13_v8i16: +; SSSE3-LABEL: ugt_3_v4i32: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pand %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: pshufb %xmm3, %xmm4 ; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: psllw $8, %xmm0 -; SSSE3-NEXT: paddb %xmm3, %xmm0 -; SSSE3-NEXT: psrlw $8, %xmm0 -; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pshufb %xmm0, %xmm1 +; SSSE3-NEXT: paddb %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: packuswb %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_13_v8i16: +; SSE41-LABEL: ugt_3_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -6527,14 +6428,16 @@ define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: psllw $8, %xmm0 -; SSE41-NEXT: paddb %xmm3, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE41-NEXT: psadbw %xmm1, %xmm3 +; SSE41-NEXT: psadbw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm3, %xmm0 +; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_13_v8i16: +; AVX1-LABEL: ugt_3_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6544,13 +6447,16 @@ define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_13_v8i16: +; AVX2-LABEL: ugt_3_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6560,52 +6466,69 @@ define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_13_v8i16: +; BITALG_NOVLX-LABEL: ugt_3_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_13_v8i16: +; BITALG-LABEL: ugt_3_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ugt <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 } -define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_14_v8i16: +define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_4_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -6621,15 +6544,18 @@ define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psllw $8, %xmm2 -; SSE2-NEXT: paddb %xmm1, %xmm2 -; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] -; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_14_v8i16: +; SSE3-LABEL: ult_4_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -6645,15 +6571,18 @@ define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: psllw $8, %xmm2 -; SSE3-NEXT: paddb %xmm1, %xmm2 -; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] -; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_14_v8i16: +; SSSE3-LABEL: ult_4_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -6665,15 +6594,18 @@ define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 +; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: psllw $8, %xmm1 -; SSSE3-NEXT: paddb %xmm3, %xmm1 -; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] -; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm3 +; SSSE3-NEXT: packuswb %xmm1, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_14_v8i16: +; SSE41-LABEL: ult_4_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -6685,15 +6617,17 @@ define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm1 -; SSE41-NEXT: psllw $8, %xmm1 -; SSE41-NEXT: paddb %xmm3, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14] -; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSE41-NEXT: psadbw %xmm0, %xmm3 +; SSE41-NEXT: psadbw %xmm0, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_14_v8i16: +; AVX1-LABEL: ult_4_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6703,14 +6637,17 @@ define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_14_v8i16: +; AVX2-LABEL: ult_4_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6720,56 +6657,69 @@ define <8 x i16> @ult_14_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] -; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_14_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_4_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_14_v8i16: +; BITALG_NOVLX-LABEL: ult_4_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] -; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_14_v8i16: +; BITALG-LABEL: ult_4_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 -} + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ult <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 +} -define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_14_v8i16: +define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_4_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -6785,14 +6735,18 @@ define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psllw $8, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_14_v8i16: +; SSE3-LABEL: ugt_4_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -6808,33 +6762,41 @@ define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psllw $8, %xmm0 -; SSE3-NEXT: paddb %xmm1, %xmm0 -; SSE3-NEXT: psrlw $8, %xmm0 -; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_14_v8i16: +; SSSE3-LABEL: ugt_4_v4i32: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pand %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: pshufb %xmm3, %xmm4 ; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: psllw $8, %xmm0 -; SSSE3-NEXT: paddb %xmm3, %xmm0 -; SSSE3-NEXT: psrlw $8, %xmm0 -; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pshufb %xmm0, %xmm1 +; SSSE3-NEXT: paddb %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: packuswb %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_14_v8i16: +; SSE41-LABEL: ugt_4_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -6846,14 +6808,16 @@ define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: psllw $8, %xmm0 -; SSE41-NEXT: paddb %xmm3, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE41-NEXT: psadbw %xmm1, %xmm3 +; SSE41-NEXT: psadbw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm3, %xmm0 +; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_14_v8i16: +; AVX1-LABEL: ugt_4_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6863,13 +6827,16 @@ define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_14_v8i16: +; AVX2-LABEL: ugt_4_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6879,52 +6846,69 @@ define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_14_v8i16: +; BITALG_NOVLX-LABEL: ugt_4_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_14_v8i16: +; BITALG-LABEL: ugt_4_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ugt <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 } -define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_15_v8i16: +define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_5_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -6940,15 +6924,18 @@ define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psllw $8, %xmm2 -; SSE2-NEXT: paddb %xmm1, %xmm2 -; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] -; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_15_v8i16: +; SSE3-LABEL: ult_5_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -6964,15 +6951,18 @@ define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: psllw $8, %xmm2 -; SSE3-NEXT: paddb %xmm1, %xmm2 -; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] -; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_15_v8i16: +; SSSE3-LABEL: ult_5_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -6984,15 +6974,18 @@ define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 +; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: psllw $8, %xmm1 -; SSSE3-NEXT: paddb %xmm3, %xmm1 -; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] -; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm3 +; SSSE3-NEXT: packuswb %xmm1, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_15_v8i16: +; SSE41-LABEL: ult_5_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -7004,15 +6997,17 @@ define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm1 -; SSE41-NEXT: psllw $8, %xmm1 -; SSE41-NEXT: paddb %xmm3, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15] -; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSE41-NEXT: psadbw %xmm0, %xmm3 +; SSE41-NEXT: psadbw %xmm0, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_15_v8i16: +; AVX1-LABEL: ult_5_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7022,14 +7017,17 @@ define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_15_v8i16: +; AVX2-LABEL: ult_5_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7039,56 +7037,69 @@ define <8 x i16> @ult_15_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_15_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_5_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_15_v8i16: +; BITALG_NOVLX-LABEL: ult_5_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_15_v8i16: +; BITALG-LABEL: ult_5_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ult <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 } -define <8 x i16> @ugt_15_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_15_v8i16: +define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_5_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -7104,14 +7115,18 @@ define <8 x i16> @ugt_15_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psllw $8, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_15_v8i16: +; SSE3-LABEL: ugt_5_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -7127,33 +7142,41 @@ define <8 x i16> @ugt_15_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psllw $8, %xmm0 -; SSE3-NEXT: paddb %xmm1, %xmm0 -; SSE3-NEXT: psrlw $8, %xmm0 -; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_15_v8i16: +; SSSE3-LABEL: ugt_5_v4i32: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pand %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: pshufb %xmm3, %xmm4 ; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: psllw $8, %xmm0 -; SSSE3-NEXT: paddb %xmm3, %xmm0 -; SSSE3-NEXT: psrlw $8, %xmm0 -; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pshufb %xmm0, %xmm1 +; SSSE3-NEXT: paddb %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: packuswb %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_15_v8i16: +; SSE41-LABEL: ugt_5_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -7165,14 +7188,16 @@ define <8 x i16> @ugt_15_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: psllw $8, %xmm0 -; SSE41-NEXT: paddb %xmm3, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE41-NEXT: psadbw %xmm1, %xmm3 +; SSE41-NEXT: psadbw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm3, %xmm0 +; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_15_v8i16: +; AVX1-LABEL: ugt_5_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7182,13 +7207,16 @@ define <8 x i16> @ugt_15_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_15_v8i16: +; AVX2-LABEL: ugt_5_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7198,52 +7226,69 @@ define <8 x i16> @ugt_15_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_15_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_15_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_15_v8i16: +; BITALG_NOVLX-LABEL: ugt_5_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_15_v8i16: +; BITALG-LABEL: ugt_5_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ugt <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 } -define <8 x i16> @ult_16_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_16_v8i16: +define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_6_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -7259,15 +7304,18 @@ define <8 x i16> @ult_16_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psllw $8, %xmm2 -; SSE2-NEXT: paddb %xmm1, %xmm2 -; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16,16,16,16,16] -; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_16_v8i16: +; SSE3-LABEL: ult_6_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -7283,15 +7331,18 @@ define <8 x i16> @ult_16_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: psllw $8, %xmm2 -; SSE3-NEXT: paddb %xmm1, %xmm2 -; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16,16,16,16,16] -; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_16_v8i16: +; SSSE3-LABEL: ult_6_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -7303,15 +7354,18 @@ define <8 x i16> @ult_16_v8i16(<8 x i16> %0) { ; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 ; SSSE3-NEXT: paddb %xmm4, %xmm3 +; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: psllw $8, %xmm1 -; SSSE3-NEXT: paddb %xmm3, %xmm1 -; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16,16,16,16,16] -; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm3 +; SSSE3-NEXT: packuswb %xmm1, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_16_v8i16: +; SSE41-LABEL: ult_6_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -7323,175 +7377,17 @@ define <8 x i16> @ult_16_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm1 -; SSE41-NEXT: psllw $8, %xmm1 -; SSE41-NEXT: paddb %xmm3, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_16_v8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_16_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_16_v8i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_16_v8i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_16_v8i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_16_v8i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 -} - -define <8 x i16> @ugt_16_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ugt_16_v8i16: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psllw $8, %xmm0 -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_16_v8i16: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psllw $8, %xmm0 -; SSE3-NEXT: paddb %xmm1, %xmm0 -; SSE3-NEXT: psrlw $8, %xmm0 -; SSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_16_v8i16: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: psllw $8, %xmm0 -; SSSE3-NEXT: paddb %xmm3, %xmm0 -; SSSE3-NEXT: psrlw $8, %xmm0 -; SSSE3-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_16_v8i16: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: psllw $8, %xmm0 -; SSE41-NEXT: paddb %xmm3, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSE41-NEXT: psadbw %xmm0, %xmm3 +; SSE41-NEXT: psadbw %xmm0, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_16_v8i16: +; AVX1-LABEL: ult_6_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7501,13 +7397,17 @@ define <8 x i16> @ugt_16_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_16_v8i16: +; AVX2-LABEL: ult_6_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7517,52 +7417,69 @@ define <8 x i16> @ugt_16_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_16_v8i16: +; AVX512VPOPCNTDQ-LABEL: ult_6_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_16_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_16_v8i16: +; BITALG_NOVLX-LABEL: ult_6_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_16_v8i16: +; BITALG-LABEL: ult_6_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ugt <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 + %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) + %3 = icmp ult <4 x i32> %2, + %4 = sext <4 x i1> %3 to <4 x i32> + ret <4 x i32> %4 } -define <8 x i16> @ult_17_v8i16(<8 x i16> %0) { -; SSE2-LABEL: ult_17_v8i16: +define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_6_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -7578,15 +7495,18 @@ define <8 x i16> @ult_17_v8i16(<8 x i16> %0) { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psllw $8, %xmm2 -; SSE2-NEXT: paddb %xmm1, %xmm2 -; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17,17,17,17,17] -; SSE2-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_17_v8i16: +; SSE3-LABEL: ugt_6_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -7602,35 +7522,41 @@ define <8 x i16> @ult_17_v8i16(<8 x i16> %0) { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: psllw $8, %xmm2 -; SSE3-NEXT: paddb %xmm1, %xmm2 -; SSE3-NEXT: psrlw $8, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17,17,17,17,17] -; SSE3-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_17_v8i16: +; SSSE3-LABEL: ugt_6_v4i32: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pand %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: pshufb %xmm3, %xmm4 ; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: psllw $8, %xmm1 -; SSSE3-NEXT: paddb %xmm3, %xmm1 -; SSSE3-NEXT: psrlw $8, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17,17,17,17,17] -; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pshufb %xmm0, %xmm1 +; SSSE3-NEXT: paddb %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: packuswb %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_17_v8i16: +; SSE41-LABEL: ugt_6_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -7642,15 +7568,16 @@ define <8 x i16> @ult_17_v8i16(<8 x i16> %0) { ; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: pshufb %xmm0, %xmm3 ; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm1 -; SSE41-NEXT: psllw $8, %xmm1 -; SSE41-NEXT: paddb %xmm3, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17,17,17,17,17] -; SSE41-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE41-NEXT: psadbw %xmm1, %xmm3 +; SSE41-NEXT: psadbw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm3, %xmm0 +; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_17_v8i16: +; AVX1-LABEL: ugt_6_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7660,14 +7587,16 @@ define <8 x i16> @ult_17_v8i16(<8 x i16> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_17_v8i16: +; AVX2-LABEL: ugt_6_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7677,82 +7606,69 @@ define <8 x i16> @ult_17_v8i16(<8 x i16> %0) { ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17] -; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_17_v8i16: +; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_17_v8i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vzeroupper +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_17_v8i16: +; BITALG_NOVLX-LABEL: ugt_6_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17] -; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_17_v8i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %xmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) - %3 = icmp ult <8 x i16> %2, - %4 = sext <8 x i1> %3 to <8 x i16> - ret <8 x i16> %4 -} - -define <4 x i32> @ult_0_v4i32(<4 x i32> %0) { -; SSE-LABEL: ult_0_v4i32: -; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: retq -; -; AVX-LABEL: ult_0_v4i32: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_0_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_0_v4i32: +; BITALG-LABEL: ugt_6_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_0_v4i32: +define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_7_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -7775,13 +7691,11 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_0_v4i32: +; SSE3-LABEL: ult_7_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -7804,13 +7718,11 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE3-NEXT: pxor %xmm0, %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_0_v4i32: +; SSSE3-LABEL: ult_7_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -7829,12 +7741,11 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0 -; SSSE3-NEXT: pxor %xmm3, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_0_v4i32: +; SSE41-LABEL: ult_7_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -7852,12 +7763,11 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_0_v4i32: +; AVX1-LABEL: ult_7_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7873,12 +7783,11 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_0_v4i32: +; AVX2-LABEL: ult_7_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7894,31 +7803,28 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_7_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_0_v4i32: +; BITALG_NOVLX-LABEL: ult_7_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7928,13 +7834,12 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_0_v4i32: +; BITALG-LABEL: ult_7_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7943,17 +7848,18 @@ define <4 x i32> @ugt_0_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_1_v4i32: +define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_7_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -7976,11 +7882,11 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_1_v4i32: +; SSE3-LABEL: ugt_7_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -8003,11 +7909,11 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_1_v4i32: +; SSSE3-LABEL: ugt_7_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -8026,11 +7932,11 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm1 ; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 +; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_1_v4i32: +; SSE41-LABEL: ugt_7_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -8048,10 +7954,10 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm1, %xmm3 ; SSE41-NEXT: psadbw %xmm1, %xmm0 ; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_1_v4i32: +; AVX1-LABEL: ugt_7_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8067,10 +7973,10 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_1_v4i32: +; AVX2-LABEL: ugt_7_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8086,26 +7992,28 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_1_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_1_v4i32: +; BITALG_NOVLX-LABEL: ugt_7_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8115,11 +8023,12 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_1_v4i32: +; BITALG-LABEL: ugt_7_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8128,129 +8037,166 @@ define <4 x i32> @ult_1_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { -; SSE-LABEL: ugt_1_v4i32: -; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: paddd %xmm2, %xmm1 -; SSE-NEXT: pand %xmm0, %xmm1 -; SSE-NEXT: pxor %xmm0, %xmm0 -; SSE-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: retq +define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_8_v4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrlw $1, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: psrlw $2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrlw $4, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: psadbw %xmm0, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: psadbw %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: retq ; -; AVX1-LABEL: ugt_1_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_1_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq +; SSE3-LABEL: ult_8_v4i32: +; SSE3: # %bb.0: +; SSE3-NEXT: movdqa %xmm0, %xmm1 +; SSE3-NEXT: psrlw $1, %xmm1 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE3-NEXT: movdqa %xmm0, %xmm2 +; SSE3-NEXT: pand %xmm1, %xmm2 +; SSE3-NEXT: psrlw $2, %xmm0 +; SSE3-NEXT: pand %xmm1, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm1 +; SSE3-NEXT: psrlw $4, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: pxor %xmm0, %xmm0 +; SSE3-NEXT: movdqa %xmm1, %xmm2 +; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE3-NEXT: psadbw %xmm0, %xmm2 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE3-NEXT: psadbw %xmm0, %xmm1 +; SSE3-NEXT: packuswb %xmm2, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE3-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_1_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq +; SSSE3-LABEL: ult_8_v4i32: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSSE3-NEXT: movdqa %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm1, %xmm2 +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: pshufb %xmm2, %xmm4 +; SSSE3-NEXT: psrlw $4, %xmm0 +; SSSE3-NEXT: pand %xmm1, %xmm0 +; SSSE3-NEXT: pshufb %xmm0, %xmm3 +; SSSE3-NEXT: paddb %xmm4, %xmm3 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm1 +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: psadbw %xmm0, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] +; SSSE3-NEXT: psadbw %xmm0, %xmm3 +; SSSE3-NEXT: packuswb %xmm1, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 +; SSSE3-NEXT: retq ; -; BITALG-LABEL: ugt_1_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_2_v4i32(<4 x i32> %0) { -; SSE-LABEL: ult_2_v4i32: -; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: paddd %xmm0, %xmm1 -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: retq +; SSE41-LABEL: ult_8_v4i32: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pand %xmm1, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: pshufb %xmm2, %xmm4 +; SSE41-NEXT: psrlw $4, %xmm0 +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: pshufb %xmm0, %xmm3 +; SSE41-NEXT: paddb %xmm4, %xmm3 +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero +; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSE41-NEXT: psadbw %xmm0, %xmm3 +; SSE41-NEXT: psadbw %xmm0, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_2_v4i32: +; AVX1-LABEL: ult_8_v4i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_2_v4i32: +; AVX2-LABEL: ult_8_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 +; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_2_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_8_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -8258,31 +8204,42 @@ define <4 x i32> @ult_2_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_2_v4i32: +; BITALG_NOVLX-LABEL: ult_8_v4i32: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_2_v4i32: +; BITALG-LABEL: ult_8_v4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 +; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 +; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_2_v4i32: +define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_8_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -8309,7 +8266,7 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_2_v4i32: +; SSE3-LABEL: ugt_8_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -8336,7 +8293,7 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_2_v4i32: +; SSSE3-LABEL: ugt_8_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -8359,7 +8316,7 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_2_v4i32: +; SSE41-LABEL: ugt_8_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -8380,7 +8337,7 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_2_v4i32: +; AVX1-LABEL: ugt_8_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8399,7 +8356,7 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_2_v4i32: +; AVX2-LABEL: ugt_8_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8415,20 +8372,20 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -8436,7 +8393,7 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_2_v4i32: +; BITALG_NOVLX-LABEL: ugt_8_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8446,12 +8403,12 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_2_v4i32: +; BITALG-LABEL: ugt_8_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8465,13 +8422,13 @@ define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_3_v4i32: +define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_9_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -8494,11 +8451,11 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_3_v4i32: +; SSE3-LABEL: ult_9_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -8521,11 +8478,11 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_3_v4i32: +; SSSE3-LABEL: ult_9_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -8544,11 +8501,11 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_3_v4i32: +; SSE41-LABEL: ult_9_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -8566,11 +8523,11 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_3_v4i32: +; AVX1-LABEL: ult_9_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8586,11 +8543,11 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_3_v4i32: +; AVX2-LABEL: ult_9_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8606,20 +8563,20 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_3_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_9_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -8627,7 +8584,7 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_3_v4i32: +; BITALG_NOVLX-LABEL: ult_9_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8637,12 +8594,12 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_3_v4i32: +; BITALG-LABEL: ult_9_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8656,13 +8613,13 @@ define <4 x i32> @ult_3_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_3_v4i32: +define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_9_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -8689,7 +8646,7 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_3_v4i32: +; SSE3-LABEL: ugt_9_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -8716,7 +8673,7 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_3_v4i32: +; SSSE3-LABEL: ugt_9_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -8739,7 +8696,7 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_3_v4i32: +; SSE41-LABEL: ugt_9_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -8760,7 +8717,7 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_3_v4i32: +; AVX1-LABEL: ugt_9_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8779,7 +8736,7 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_3_v4i32: +; AVX2-LABEL: ugt_9_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8795,20 +8752,20 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -8816,7 +8773,7 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_3_v4i32: +; BITALG_NOVLX-LABEL: ugt_9_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8826,12 +8783,12 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_3_v4i32: +; BITALG-LABEL: ugt_9_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8845,13 +8802,13 @@ define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_4_v4i32: +define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_10_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -8874,11 +8831,11 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_4_v4i32: +; SSE3-LABEL: ult_10_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -8901,11 +8858,11 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_4_v4i32: +; SSSE3-LABEL: ult_10_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -8924,11 +8881,11 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_4_v4i32: +; SSE41-LABEL: ult_10_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -8946,11 +8903,11 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_4_v4i32: +; AVX1-LABEL: ult_10_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8966,11 +8923,11 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_4_v4i32: +; AVX2-LABEL: ult_10_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8986,20 +8943,20 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_4_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_10_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -9007,7 +8964,7 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_4_v4i32: +; BITALG_NOVLX-LABEL: ult_10_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9017,12 +8974,12 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_4_v4i32: +; BITALG-LABEL: ult_10_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9036,13 +8993,13 @@ define <4 x i32> @ult_4_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_4_v4i32: +define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_10_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -9069,7 +9026,7 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_4_v4i32: +; SSE3-LABEL: ugt_10_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -9096,7 +9053,7 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_4_v4i32: +; SSSE3-LABEL: ugt_10_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -9119,7 +9076,7 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_4_v4i32: +; SSE41-LABEL: ugt_10_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -9140,7 +9097,7 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_4_v4i32: +; AVX1-LABEL: ugt_10_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9159,7 +9116,7 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_4_v4i32: +; AVX2-LABEL: ugt_10_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9175,20 +9132,20 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -9196,7 +9153,7 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_4_v4i32: +; BITALG_NOVLX-LABEL: ugt_10_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9206,12 +9163,12 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_4_v4i32: +; BITALG-LABEL: ugt_10_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9225,13 +9182,13 @@ define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_5_v4i32: +define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_11_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -9254,11 +9211,11 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_5_v4i32: +; SSE3-LABEL: ult_11_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -9281,11 +9238,11 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_5_v4i32: +; SSSE3-LABEL: ult_11_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -9304,11 +9261,11 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_5_v4i32: +; SSE41-LABEL: ult_11_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -9326,11 +9283,11 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_5_v4i32: +; AVX1-LABEL: ult_11_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9346,11 +9303,11 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_5_v4i32: +; AVX2-LABEL: ult_11_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9366,20 +9323,20 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_5_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_11_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -9387,7 +9344,7 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_5_v4i32: +; BITALG_NOVLX-LABEL: ult_11_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9397,12 +9354,12 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_5_v4i32: +; BITALG-LABEL: ult_11_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9416,13 +9373,13 @@ define <4 x i32> @ult_5_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_5_v4i32: +define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_11_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -9449,7 +9406,7 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_5_v4i32: +; SSE3-LABEL: ugt_11_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -9476,7 +9433,7 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_5_v4i32: +; SSSE3-LABEL: ugt_11_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -9499,7 +9456,7 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_5_v4i32: +; SSE41-LABEL: ugt_11_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -9520,7 +9477,7 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_5_v4i32: +; AVX1-LABEL: ugt_11_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9539,7 +9496,7 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_5_v4i32: +; AVX2-LABEL: ugt_11_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9555,20 +9512,20 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -9576,7 +9533,7 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_5_v4i32: +; BITALG_NOVLX-LABEL: ugt_11_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9586,12 +9543,12 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_5_v4i32: +; BITALG-LABEL: ugt_11_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9605,13 +9562,13 @@ define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_6_v4i32: +define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_12_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -9634,11 +9591,11 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_6_v4i32: +; SSE3-LABEL: ult_12_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -9661,11 +9618,11 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_6_v4i32: +; SSSE3-LABEL: ult_12_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -9684,11 +9641,11 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_6_v4i32: +; SSE41-LABEL: ult_12_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -9706,11 +9663,11 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_6_v4i32: +; AVX1-LABEL: ult_12_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9726,11 +9683,11 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_6_v4i32: +; AVX2-LABEL: ult_12_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9746,20 +9703,20 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_6_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_12_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -9767,7 +9724,7 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_6_v4i32: +; BITALG_NOVLX-LABEL: ult_12_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9777,12 +9734,12 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_6_v4i32: +; BITALG-LABEL: ult_12_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9796,13 +9753,13 @@ define <4 x i32> @ult_6_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_6_v4i32: +define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_12_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -9829,7 +9786,7 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_6_v4i32: +; SSE3-LABEL: ugt_12_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -9856,7 +9813,7 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_6_v4i32: +; SSSE3-LABEL: ugt_12_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -9879,7 +9836,7 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_6_v4i32: +; SSE41-LABEL: ugt_12_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -9900,7 +9857,7 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_6_v4i32: +; AVX1-LABEL: ugt_12_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9919,7 +9876,7 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_6_v4i32: +; AVX2-LABEL: ugt_12_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9935,20 +9892,20 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -9956,7 +9913,7 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_6_v4i32: +; BITALG_NOVLX-LABEL: ugt_12_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9966,12 +9923,12 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_6_v4i32: +; BITALG-LABEL: ugt_12_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9985,13 +9942,13 @@ define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_7_v4i32: +define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_13_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -10014,11 +9971,11 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_7_v4i32: +; SSE3-LABEL: ult_13_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -10041,11 +9998,11 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_7_v4i32: +; SSSE3-LABEL: ult_13_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -10064,11 +10021,11 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_7_v4i32: +; SSE41-LABEL: ult_13_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -10086,11 +10043,11 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_7_v4i32: +; AVX1-LABEL: ult_13_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10106,11 +10063,11 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_7_v4i32: +; AVX2-LABEL: ult_13_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10126,20 +10083,20 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_7_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_13_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -10147,7 +10104,7 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_7_v4i32: +; BITALG_NOVLX-LABEL: ult_13_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10157,12 +10114,12 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_7_v4i32: +; BITALG-LABEL: ult_13_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10176,13 +10133,13 @@ define <4 x i32> @ult_7_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_7_v4i32: +define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_13_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -10209,7 +10166,7 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_7_v4i32: +; SSE3-LABEL: ugt_13_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -10236,7 +10193,7 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_7_v4i32: +; SSSE3-LABEL: ugt_13_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -10259,7 +10216,7 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_7_v4i32: +; SSE41-LABEL: ugt_13_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -10280,7 +10237,7 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_7_v4i32: +; AVX1-LABEL: ugt_13_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10299,7 +10256,7 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v4i32: +; AVX2-LABEL: ugt_13_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10315,20 +10272,20 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -10336,7 +10293,7 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v4i32: +; BITALG_NOVLX-LABEL: ugt_13_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10346,12 +10303,12 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v4i32: +; BITALG-LABEL: ugt_13_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10365,13 +10322,13 @@ define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_8_v4i32: +define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_14_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -10394,11 +10351,11 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_8_v4i32: +; SSE3-LABEL: ult_14_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -10421,11 +10378,11 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_8_v4i32: +; SSSE3-LABEL: ult_14_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -10444,11 +10401,11 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_8_v4i32: +; SSE41-LABEL: ult_14_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -10466,11 +10423,11 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_8_v4i32: +; AVX1-LABEL: ult_14_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10486,11 +10443,11 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v4i32: +; AVX2-LABEL: ult_14_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10506,20 +10463,20 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_14_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -10527,7 +10484,7 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v4i32: +; BITALG_NOVLX-LABEL: ult_14_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10537,12 +10494,12 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v4i32: +; BITALG-LABEL: ult_14_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10556,13 +10513,13 @@ define <4 x i32> @ult_8_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_8_v4i32: +define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_14_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -10589,7 +10546,7 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_8_v4i32: +; SSE3-LABEL: ugt_14_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -10616,7 +10573,7 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_8_v4i32: +; SSSE3-LABEL: ugt_14_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -10639,7 +10596,7 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_8_v4i32: +; SSE41-LABEL: ugt_14_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -10660,7 +10617,7 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_8_v4i32: +; AVX1-LABEL: ugt_14_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10679,7 +10636,7 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v4i32: +; AVX2-LABEL: ugt_14_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10695,20 +10652,20 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -10716,7 +10673,7 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v4i32: +; BITALG_NOVLX-LABEL: ugt_14_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10726,12 +10683,12 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v4i32: +; BITALG-LABEL: ugt_14_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10745,13 +10702,13 @@ define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_9_v4i32: +define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_15_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -10774,11 +10731,11 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_9_v4i32: +; SSE3-LABEL: ult_15_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -10801,11 +10758,11 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_9_v4i32: +; SSSE3-LABEL: ult_15_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -10824,11 +10781,11 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_9_v4i32: +; SSE41-LABEL: ult_15_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -10846,11 +10803,11 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_9_v4i32: +; AVX1-LABEL: ult_15_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10866,11 +10823,11 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v4i32: +; AVX2-LABEL: ult_15_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10886,20 +10843,20 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_15_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -10907,7 +10864,7 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v4i32: +; BITALG_NOVLX-LABEL: ult_15_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10917,12 +10874,12 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v4i32: +; BITALG-LABEL: ult_15_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10936,13 +10893,13 @@ define <4 x i32> @ult_9_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_9_v4i32: +define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_15_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -10969,7 +10926,7 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_9_v4i32: +; SSE3-LABEL: ugt_15_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -10996,7 +10953,7 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_9_v4i32: +; SSSE3-LABEL: ugt_15_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -11019,7 +10976,7 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_9_v4i32: +; SSE41-LABEL: ugt_15_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -11040,7 +10997,7 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_9_v4i32: +; AVX1-LABEL: ugt_15_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11059,7 +11016,7 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_9_v4i32: +; AVX2-LABEL: ugt_15_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11075,20 +11032,20 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -11096,7 +11053,7 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_9_v4i32: +; BITALG_NOVLX-LABEL: ugt_15_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -11106,12 +11063,12 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_9_v4i32: +; BITALG-LABEL: ugt_15_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11125,13 +11082,13 @@ define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_10_v4i32: +define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_16_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -11154,11 +11111,11 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_10_v4i32: +; SSE3-LABEL: ult_16_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -11181,11 +11138,11 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_10_v4i32: +; SSSE3-LABEL: ult_16_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -11204,11 +11161,11 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_10_v4i32: +; SSE41-LABEL: ult_16_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -11226,11 +11183,11 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_10_v4i32: +; AVX1-LABEL: ult_16_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11246,11 +11203,11 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_10_v4i32: +; AVX2-LABEL: ult_16_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11266,20 +11223,20 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_10_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_16_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -11287,7 +11244,7 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_10_v4i32: +; BITALG_NOVLX-LABEL: ult_16_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -11297,12 +11254,12 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_10_v4i32: +; BITALG-LABEL: ult_16_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11316,13 +11273,13 @@ define <4 x i32> @ult_10_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_10_v4i32: +define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_16_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -11349,7 +11306,7 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_10_v4i32: +; SSE3-LABEL: ugt_16_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -11376,7 +11333,7 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_10_v4i32: +; SSSE3-LABEL: ugt_16_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -11399,7 +11356,7 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_10_v4i32: +; SSE41-LABEL: ugt_16_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -11420,7 +11377,7 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_10_v4i32: +; AVX1-LABEL: ugt_16_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11439,7 +11396,7 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_10_v4i32: +; AVX2-LABEL: ugt_16_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11455,20 +11412,20 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -11476,7 +11433,7 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_10_v4i32: +; BITALG_NOVLX-LABEL: ugt_16_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -11486,12 +11443,12 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_10_v4i32: +; BITALG-LABEL: ugt_16_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11505,13 +11462,13 @@ define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_11_v4i32: +define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_17_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -11534,11 +11491,11 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_11_v4i32: +; SSE3-LABEL: ult_17_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -11561,11 +11518,11 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_11_v4i32: +; SSSE3-LABEL: ult_17_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -11584,11 +11541,11 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_11_v4i32: +; SSE41-LABEL: ult_17_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -11606,11 +11563,11 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_11_v4i32: +; AVX1-LABEL: ult_17_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11626,11 +11583,11 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_11_v4i32: +; AVX2-LABEL: ult_17_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11646,20 +11603,20 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_11_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_17_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -11667,7 +11624,7 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_11_v4i32: +; BITALG_NOVLX-LABEL: ult_17_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -11677,12 +11634,12 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_11_v4i32: +; BITALG-LABEL: ult_17_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11696,13 +11653,13 @@ define <4 x i32> @ult_11_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_11_v4i32: +define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_17_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -11729,7 +11686,7 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_11_v4i32: +; SSE3-LABEL: ugt_17_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -11756,7 +11713,7 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_11_v4i32: +; SSSE3-LABEL: ugt_17_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -11779,7 +11736,7 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_11_v4i32: +; SSE41-LABEL: ugt_17_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -11800,7 +11757,7 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_11_v4i32: +; AVX1-LABEL: ugt_17_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11819,7 +11776,7 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_11_v4i32: +; AVX2-LABEL: ugt_17_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11835,20 +11792,20 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -11856,7 +11813,7 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_11_v4i32: +; BITALG_NOVLX-LABEL: ugt_17_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -11866,12 +11823,12 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_11_v4i32: +; BITALG-LABEL: ugt_17_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11885,13 +11842,13 @@ define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_12_v4i32: +define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_18_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -11914,11 +11871,11 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_12_v4i32: +; SSE3-LABEL: ult_18_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -11941,11 +11898,11 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_12_v4i32: +; SSSE3-LABEL: ult_18_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -11964,11 +11921,11 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_12_v4i32: +; SSE41-LABEL: ult_18_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -11986,11 +11943,11 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_12_v4i32: +; AVX1-LABEL: ult_18_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12006,11 +11963,11 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_12_v4i32: +; AVX2-LABEL: ult_18_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12026,20 +11983,20 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_12_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_18_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -12047,7 +12004,7 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_12_v4i32: +; BITALG_NOVLX-LABEL: ult_18_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -12057,12 +12014,12 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_12_v4i32: +; BITALG-LABEL: ult_18_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12076,13 +12033,13 @@ define <4 x i32> @ult_12_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_12_v4i32: +define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_18_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -12109,7 +12066,7 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_12_v4i32: +; SSE3-LABEL: ugt_18_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -12136,7 +12093,7 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_12_v4i32: +; SSSE3-LABEL: ugt_18_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -12159,7 +12116,7 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_12_v4i32: +; SSE41-LABEL: ugt_18_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -12180,7 +12137,7 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_12_v4i32: +; AVX1-LABEL: ugt_18_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12199,7 +12156,7 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_12_v4i32: +; AVX2-LABEL: ugt_18_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12215,20 +12172,20 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -12236,7 +12193,7 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_12_v4i32: +; BITALG_NOVLX-LABEL: ugt_18_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -12246,12 +12203,12 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_12_v4i32: +; BITALG-LABEL: ugt_18_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12265,13 +12222,13 @@ define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_13_v4i32: +define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_19_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -12294,11 +12251,11 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_13_v4i32: +; SSE3-LABEL: ult_19_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -12321,11 +12278,11 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_13_v4i32: +; SSSE3-LABEL: ult_19_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -12344,11 +12301,11 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_13_v4i32: +; SSE41-LABEL: ult_19_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -12366,11 +12323,11 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_13_v4i32: +; AVX1-LABEL: ult_19_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12386,11 +12343,11 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_13_v4i32: +; AVX2-LABEL: ult_19_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12406,20 +12363,20 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_13_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_19_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -12427,7 +12384,7 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_13_v4i32: +; BITALG_NOVLX-LABEL: ult_19_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -12437,12 +12394,12 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_13_v4i32: +; BITALG-LABEL: ult_19_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12456,13 +12413,13 @@ define <4 x i32> @ult_13_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_13_v4i32: +define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_19_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -12489,7 +12446,7 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_13_v4i32: +; SSE3-LABEL: ugt_19_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -12516,7 +12473,7 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_13_v4i32: +; SSSE3-LABEL: ugt_19_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -12539,7 +12496,7 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_13_v4i32: +; SSE41-LABEL: ugt_19_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -12560,7 +12517,7 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_13_v4i32: +; AVX1-LABEL: ugt_19_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12579,7 +12536,7 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_13_v4i32: +; AVX2-LABEL: ugt_19_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12595,20 +12552,20 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -12616,7 +12573,7 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_13_v4i32: +; BITALG_NOVLX-LABEL: ugt_19_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -12626,12 +12583,12 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_13_v4i32: +; BITALG-LABEL: ugt_19_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12645,13 +12602,13 @@ define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_14_v4i32: +define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_20_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -12674,11 +12631,11 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_14_v4i32: +; SSE3-LABEL: ult_20_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -12701,11 +12658,11 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_14_v4i32: +; SSSE3-LABEL: ult_20_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -12724,11 +12681,11 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_14_v4i32: +; SSE41-LABEL: ult_20_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -12746,11 +12703,11 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_14_v4i32: +; AVX1-LABEL: ult_20_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12766,11 +12723,11 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_14_v4i32: +; AVX2-LABEL: ult_20_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12786,20 +12743,20 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_14_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_20_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -12807,7 +12764,7 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_14_v4i32: +; BITALG_NOVLX-LABEL: ult_20_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -12817,12 +12774,12 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_14_v4i32: +; BITALG-LABEL: ult_20_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12836,13 +12793,13 @@ define <4 x i32> @ult_14_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_14_v4i32: +define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_20_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -12869,7 +12826,7 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_14_v4i32: +; SSE3-LABEL: ugt_20_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -12896,7 +12853,7 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_14_v4i32: +; SSSE3-LABEL: ugt_20_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -12919,7 +12876,7 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_14_v4i32: +; SSE41-LABEL: ugt_20_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -12940,7 +12897,7 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_14_v4i32: +; AVX1-LABEL: ugt_20_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12959,7 +12916,7 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_14_v4i32: +; AVX2-LABEL: ugt_20_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12975,20 +12932,20 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -12996,7 +12953,7 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_14_v4i32: +; BITALG_NOVLX-LABEL: ugt_20_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -13006,12 +12963,12 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_14_v4i32: +; BITALG-LABEL: ugt_20_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13025,13 +12982,13 @@ define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_15_v4i32: +define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_21_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -13054,11 +13011,11 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_15_v4i32: +; SSE3-LABEL: ult_21_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -13081,11 +13038,11 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_15_v4i32: +; SSSE3-LABEL: ult_21_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -13104,11 +13061,11 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_15_v4i32: +; SSE41-LABEL: ult_21_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -13126,11 +13083,11 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_15_v4i32: +; AVX1-LABEL: ult_21_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13146,11 +13103,11 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_15_v4i32: +; AVX2-LABEL: ult_21_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13166,20 +13123,20 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_15_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_21_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -13187,7 +13144,7 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_15_v4i32: +; BITALG_NOVLX-LABEL: ult_21_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -13197,12 +13154,12 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_15_v4i32: +; BITALG-LABEL: ult_21_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13216,13 +13173,13 @@ define <4 x i32> @ult_15_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_15_v4i32: +define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_21_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -13249,7 +13206,7 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_15_v4i32: +; SSE3-LABEL: ugt_21_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -13276,7 +13233,7 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_15_v4i32: +; SSSE3-LABEL: ugt_21_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -13299,7 +13256,7 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_15_v4i32: +; SSE41-LABEL: ugt_21_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -13320,7 +13277,7 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_15_v4i32: +; AVX1-LABEL: ugt_21_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13339,7 +13296,7 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_15_v4i32: +; AVX2-LABEL: ugt_21_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13355,20 +13312,20 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -13376,7 +13333,7 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_15_v4i32: +; BITALG_NOVLX-LABEL: ugt_21_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -13386,12 +13343,12 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_15_v4i32: +; BITALG-LABEL: ugt_21_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13405,13 +13362,13 @@ define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_16_v4i32: +define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_22_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -13434,11 +13391,11 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_16_v4i32: +; SSE3-LABEL: ult_22_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -13461,11 +13418,11 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_16_v4i32: +; SSSE3-LABEL: ult_22_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -13484,11 +13441,11 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_16_v4i32: +; SSE41-LABEL: ult_22_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -13506,11 +13463,11 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_16_v4i32: +; AVX1-LABEL: ult_22_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13526,11 +13483,11 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_16_v4i32: +; AVX2-LABEL: ult_22_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13546,20 +13503,20 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_16_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_22_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -13567,7 +13524,7 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_16_v4i32: +; BITALG_NOVLX-LABEL: ult_22_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -13577,12 +13534,12 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_16_v4i32: +; BITALG-LABEL: ult_22_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13596,13 +13553,13 @@ define <4 x i32> @ult_16_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_16_v4i32: +define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_22_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -13629,7 +13586,7 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_16_v4i32: +; SSE3-LABEL: ugt_22_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -13656,7 +13613,7 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_16_v4i32: +; SSSE3-LABEL: ugt_22_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -13679,7 +13636,7 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_16_v4i32: +; SSE41-LABEL: ugt_22_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -13700,7 +13657,7 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_16_v4i32: +; AVX1-LABEL: ugt_22_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13719,7 +13676,7 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_16_v4i32: +; AVX2-LABEL: ugt_22_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13735,20 +13692,20 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -13756,7 +13713,7 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_16_v4i32: +; BITALG_NOVLX-LABEL: ugt_22_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -13766,12 +13723,12 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_16_v4i32: +; BITALG-LABEL: ugt_22_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13785,13 +13742,13 @@ define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_17_v4i32: +define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_23_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -13814,11 +13771,11 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_17_v4i32: +; SSE3-LABEL: ult_23_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -13841,11 +13798,11 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_17_v4i32: +; SSSE3-LABEL: ult_23_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -13864,11 +13821,11 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_17_v4i32: +; SSE41-LABEL: ult_23_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -13886,11 +13843,11 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_17_v4i32: +; AVX1-LABEL: ult_23_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13906,11 +13863,11 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_17_v4i32: +; AVX2-LABEL: ult_23_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13926,20 +13883,20 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_17_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_23_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -13947,7 +13904,7 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_17_v4i32: +; BITALG_NOVLX-LABEL: ult_23_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -13957,12 +13914,12 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_17_v4i32: +; BITALG-LABEL: ult_23_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13976,13 +13933,13 @@ define <4 x i32> @ult_17_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_17_v4i32: +define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_23_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -14009,7 +13966,7 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_17_v4i32: +; SSE3-LABEL: ugt_23_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -14036,7 +13993,7 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_17_v4i32: +; SSSE3-LABEL: ugt_23_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -14059,7 +14016,7 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_17_v4i32: +; SSE41-LABEL: ugt_23_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -14080,7 +14037,7 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_17_v4i32: +; AVX1-LABEL: ugt_23_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14099,7 +14056,7 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_17_v4i32: +; AVX2-LABEL: ugt_23_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14115,20 +14072,20 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -14136,7 +14093,7 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_17_v4i32: +; BITALG_NOVLX-LABEL: ugt_23_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -14146,12 +14103,12 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_17_v4i32: +; BITALG-LABEL: ugt_23_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14165,13 +14122,13 @@ define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_18_v4i32: +define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_24_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -14194,11 +14151,11 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_18_v4i32: +; SSE3-LABEL: ult_24_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -14221,11 +14178,11 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_18_v4i32: +; SSSE3-LABEL: ult_24_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -14244,11 +14201,11 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_18_v4i32: +; SSE41-LABEL: ult_24_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -14266,11 +14223,11 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_18_v4i32: +; AVX1-LABEL: ult_24_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14286,11 +14243,11 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_18_v4i32: +; AVX2-LABEL: ult_24_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14306,20 +14263,20 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_18_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_24_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -14327,7 +14284,7 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_18_v4i32: +; BITALG_NOVLX-LABEL: ult_24_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -14337,12 +14294,12 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_18_v4i32: +; BITALG-LABEL: ult_24_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14356,13 +14313,13 @@ define <4 x i32> @ult_18_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_18_v4i32: +define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_24_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -14389,7 +14346,7 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_18_v4i32: +; SSE3-LABEL: ugt_24_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -14416,7 +14373,7 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_18_v4i32: +; SSSE3-LABEL: ugt_24_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -14439,7 +14396,7 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_18_v4i32: +; SSE41-LABEL: ugt_24_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -14460,7 +14417,7 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_18_v4i32: +; AVX1-LABEL: ugt_24_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14479,7 +14436,7 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_18_v4i32: +; AVX2-LABEL: ugt_24_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14495,20 +14452,20 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -14516,7 +14473,7 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_18_v4i32: +; BITALG_NOVLX-LABEL: ugt_24_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -14526,12 +14483,12 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_18_v4i32: +; BITALG-LABEL: ugt_24_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14545,13 +14502,13 @@ define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_19_v4i32: +define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_25_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -14574,11 +14531,11 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_19_v4i32: +; SSE3-LABEL: ult_25_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -14601,11 +14558,11 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_19_v4i32: +; SSSE3-LABEL: ult_25_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -14624,11 +14581,11 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_19_v4i32: +; SSE41-LABEL: ult_25_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -14646,11 +14603,11 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_19_v4i32: +; AVX1-LABEL: ult_25_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14666,11 +14623,11 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_19_v4i32: +; AVX2-LABEL: ult_25_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14686,20 +14643,20 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_19_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_25_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -14707,7 +14664,7 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_19_v4i32: +; BITALG_NOVLX-LABEL: ult_25_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -14717,12 +14674,12 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_19_v4i32: +; BITALG-LABEL: ult_25_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14736,13 +14693,13 @@ define <4 x i32> @ult_19_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_19_v4i32: +define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_25_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -14769,7 +14726,7 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_19_v4i32: +; SSE3-LABEL: ugt_25_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -14796,7 +14753,7 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_19_v4i32: +; SSSE3-LABEL: ugt_25_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -14819,7 +14776,7 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_19_v4i32: +; SSE41-LABEL: ugt_25_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -14840,7 +14797,7 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_19_v4i32: +; AVX1-LABEL: ugt_25_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14859,7 +14816,7 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_19_v4i32: +; AVX2-LABEL: ugt_25_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14875,20 +14832,20 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -14896,7 +14853,7 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_19_v4i32: +; BITALG_NOVLX-LABEL: ugt_25_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -14906,12 +14863,12 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_19_v4i32: +; BITALG-LABEL: ugt_25_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14925,13 +14882,13 @@ define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_20_v4i32: +define <4 x i32> @ult_26_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_26_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -14954,11 +14911,11 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_20_v4i32: +; SSE3-LABEL: ult_26_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -14981,11 +14938,11 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_20_v4i32: +; SSSE3-LABEL: ult_26_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -15004,11 +14961,11 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_20_v4i32: +; SSE41-LABEL: ult_26_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -15026,11 +14983,11 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_20_v4i32: +; AVX1-LABEL: ult_26_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15046,11 +15003,11 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_20_v4i32: +; AVX2-LABEL: ult_26_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15066,20 +15023,20 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_20_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_26_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -15087,7 +15044,7 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_20_v4i32: +; BITALG_NOVLX-LABEL: ult_26_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -15097,12 +15054,12 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_20_v4i32: +; BITALG-LABEL: ult_26_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15116,13 +15073,13 @@ define <4 x i32> @ult_20_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_20_v4i32: +define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_26_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -15149,7 +15106,7 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_20_v4i32: +; SSE3-LABEL: ugt_26_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -15176,7 +15133,7 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_20_v4i32: +; SSSE3-LABEL: ugt_26_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -15199,7 +15156,7 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_20_v4i32: +; SSE41-LABEL: ugt_26_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -15220,7 +15177,7 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_20_v4i32: +; AVX1-LABEL: ugt_26_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15239,7 +15196,7 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_20_v4i32: +; AVX2-LABEL: ugt_26_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15255,20 +15212,20 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -15276,7 +15233,7 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_20_v4i32: +; BITALG_NOVLX-LABEL: ugt_26_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -15286,12 +15243,12 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_20_v4i32: +; BITALG-LABEL: ugt_26_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15305,13 +15262,13 @@ define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_21_v4i32: +define <4 x i32> @ult_27_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_27_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -15334,11 +15291,11 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_21_v4i32: +; SSE3-LABEL: ult_27_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -15361,11 +15318,11 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_21_v4i32: +; SSSE3-LABEL: ult_27_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -15384,11 +15341,11 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_21_v4i32: +; SSE41-LABEL: ult_27_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -15406,11 +15363,11 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_21_v4i32: +; AVX1-LABEL: ult_27_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15426,11 +15383,11 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_21_v4i32: +; AVX2-LABEL: ult_27_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15446,20 +15403,20 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_21_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_27_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -15467,7 +15424,7 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_21_v4i32: +; BITALG_NOVLX-LABEL: ult_27_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -15477,12 +15434,12 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_21_v4i32: +; BITALG-LABEL: ult_27_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15496,13 +15453,13 @@ define <4 x i32> @ult_21_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_21_v4i32: +define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_27_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -15529,7 +15486,7 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_21_v4i32: +; SSE3-LABEL: ugt_27_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -15556,7 +15513,7 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_21_v4i32: +; SSSE3-LABEL: ugt_27_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -15579,7 +15536,7 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_21_v4i32: +; SSE41-LABEL: ugt_27_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -15600,7 +15557,7 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_21_v4i32: +; AVX1-LABEL: ugt_27_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15619,7 +15576,7 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_21_v4i32: +; AVX2-LABEL: ugt_27_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15635,20 +15592,20 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -15656,7 +15613,7 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_21_v4i32: +; BITALG_NOVLX-LABEL: ugt_27_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -15666,12 +15623,12 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_21_v4i32: +; BITALG-LABEL: ugt_27_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15685,13 +15642,13 @@ define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_22_v4i32: +define <4 x i32> @ult_28_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_28_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -15714,11 +15671,11 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_22_v4i32: +; SSE3-LABEL: ult_28_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -15741,11 +15698,11 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_22_v4i32: +; SSSE3-LABEL: ult_28_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -15764,11 +15721,11 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_22_v4i32: +; SSE41-LABEL: ult_28_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -15786,11 +15743,11 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_22_v4i32: +; AVX1-LABEL: ult_28_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15806,11 +15763,11 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_22_v4i32: +; AVX2-LABEL: ult_28_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15826,20 +15783,20 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_22_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_28_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -15847,7 +15804,7 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_22_v4i32: +; BITALG_NOVLX-LABEL: ult_28_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -15857,12 +15814,12 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_22_v4i32: +; BITALG-LABEL: ult_28_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15876,13 +15833,13 @@ define <4 x i32> @ult_22_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_22_v4i32: +define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_28_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -15909,7 +15866,7 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_22_v4i32: +; SSE3-LABEL: ugt_28_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -15936,7 +15893,7 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_22_v4i32: +; SSSE3-LABEL: ugt_28_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -15959,7 +15916,7 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_22_v4i32: +; SSE41-LABEL: ugt_28_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -15980,7 +15937,7 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_22_v4i32: +; AVX1-LABEL: ugt_28_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15999,7 +15956,7 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_22_v4i32: +; AVX2-LABEL: ugt_28_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16015,20 +15972,20 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -16036,7 +15993,7 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_22_v4i32: +; BITALG_NOVLX-LABEL: ugt_28_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -16046,12 +16003,12 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_22_v4i32: +; BITALG-LABEL: ugt_28_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16065,13 +16022,13 @@ define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_23_v4i32: +define <4 x i32> @ult_29_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_29_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -16094,11 +16051,11 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_23_v4i32: +; SSE3-LABEL: ult_29_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -16121,11 +16078,11 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_23_v4i32: +; SSSE3-LABEL: ult_29_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -16144,11 +16101,11 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_23_v4i32: +; SSE41-LABEL: ult_29_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -16166,11 +16123,11 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_23_v4i32: +; AVX1-LABEL: ult_29_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16186,11 +16143,11 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_23_v4i32: +; AVX2-LABEL: ult_29_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16206,20 +16163,20 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_23_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_29_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -16227,7 +16184,7 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_23_v4i32: +; BITALG_NOVLX-LABEL: ult_29_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -16237,12 +16194,12 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_23_v4i32: +; BITALG-LABEL: ult_29_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16256,13 +16213,13 @@ define <4 x i32> @ult_23_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_23_v4i32: +define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_29_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -16289,7 +16246,7 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_23_v4i32: +; SSE3-LABEL: ugt_29_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -16316,7 +16273,7 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_23_v4i32: +; SSSE3-LABEL: ugt_29_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -16339,7 +16296,7 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_23_v4i32: +; SSE41-LABEL: ugt_29_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -16360,7 +16317,7 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_23_v4i32: +; AVX1-LABEL: ugt_29_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16379,7 +16336,7 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_23_v4i32: +; AVX2-LABEL: ugt_29_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16395,20 +16352,20 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -16416,7 +16373,7 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_23_v4i32: +; BITALG_NOVLX-LABEL: ugt_29_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -16426,12 +16383,12 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_23_v4i32: +; BITALG-LABEL: ugt_29_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16445,13 +16402,13 @@ define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_24_v4i32: +define <4 x i32> @ult_30_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_30_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -16474,11 +16431,11 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_24_v4i32: +; SSE3-LABEL: ult_30_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -16501,11 +16458,11 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_24_v4i32: +; SSSE3-LABEL: ult_30_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -16524,11 +16481,11 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_24_v4i32: +; SSE41-LABEL: ult_30_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -16546,11 +16503,11 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_24_v4i32: +; AVX1-LABEL: ult_30_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16566,11 +16523,11 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_24_v4i32: +; AVX2-LABEL: ult_30_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16586,20 +16543,20 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_24_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_30_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -16607,7 +16564,7 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_24_v4i32: +; BITALG_NOVLX-LABEL: ult_30_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -16617,12 +16574,12 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_24_v4i32: +; BITALG-LABEL: ult_30_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16636,13 +16593,13 @@ define <4 x i32> @ult_24_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_24_v4i32: +define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ugt_30_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -16669,7 +16626,7 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_24_v4i32: +; SSE3-LABEL: ugt_30_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -16696,7 +16653,7 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_24_v4i32: +; SSSE3-LABEL: ugt_30_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm3 @@ -16719,7 +16676,7 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_24_v4i32: +; SSE41-LABEL: ugt_30_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -16740,7 +16697,7 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_24_v4i32: +; AVX1-LABEL: ugt_30_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16759,7 +16716,7 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_24_v4i32: +; AVX2-LABEL: ugt_30_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16775,20 +16732,20 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i32: +; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -16796,7 +16753,7 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_24_v4i32: +; BITALG_NOVLX-LABEL: ugt_30_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -16806,12 +16763,12 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_24_v4i32: +; BITALG-LABEL: ugt_30_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16825,13 +16782,13 @@ define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ugt <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_25_v4i32: +define <4 x i32> @ult_31_v4i32(<4 x i32> %0) { +; SSE2-LABEL: ult_31_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -16854,11 +16811,11 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: psadbw %xmm0, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_25_v4i32: +; SSE3-LABEL: ult_31_v4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -16881,11 +16838,11 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: psadbw %xmm0, %xmm1 ; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_25_v4i32: +; SSSE3-LABEL: ult_31_v4i32: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -16904,11 +16861,11 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSSE3-NEXT: psadbw %xmm0, %xmm3 ; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_25_v4i32: +; SSE41-LABEL: ult_31_v4i32: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -16926,11 +16883,11 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; SSE41-NEXT: psadbw %xmm0, %xmm3 ; SSE41-NEXT: psadbw %xmm0, %xmm1 ; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25] +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_25_v4i32: +; AVX1-LABEL: ult_31_v4i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16946,11 +16903,11 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31,31,31] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_25_v4i32: +; AVX2-LABEL: ult_31_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16966,20 +16923,20 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_25_v4i32: +; AVX512VPOPCNTDQ-LABEL: ult_31_v4i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i32: +; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 @@ -16987,7 +16944,7 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_25_v4i32: +; BITALG_NOVLX-LABEL: ult_31_v4i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -16997,12 +16954,12 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_25_v4i32: +; BITALG-LABEL: ult_31_v4i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17016,4136 +16973,84 @@ define <4 x i32> @ult_25_v4i32(<4 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_25_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_25_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_25_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_25_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_25_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_25_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_25_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_25_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, + %3 = icmp ult <4 x i32> %2, %4 = sext <4 x i1> %3 to <4 x i32> ret <4 x i32> %4 } -define <4 x i32> @ult_26_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_26_v4i32: +define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_1_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: paddq %xmm1, %xmm2 +; SSE2-NEXT: pand %xmm0, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_26_v4i32: +; SSE3-LABEL: ugt_1_v2i64: ; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE3-NEXT: paddq %xmm1, %xmm2 +; SSE3-NEXT: pand %xmm0, %xmm2 +; SSE3-NEXT: pxor %xmm3, %xmm3 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] +; SSE3-NEXT: pand %xmm3, %xmm0 +; SSE3-NEXT: pxor %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_26_v4i32: +; SSSE3-LABEL: ugt_1_v2i64: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_26_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_26_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_26_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_26_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_26_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_26_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_26_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_26_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_26_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_26_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_26_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_26_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_26_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_26_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_27_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_27_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_27_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_27_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_27_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_27_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_27_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_27_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_27_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_27_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_27_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_27_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_27_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_27_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_27_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_27_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_27_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_27_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_28_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_28_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_28_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_28_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_28_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_28_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_28_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_28_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_28_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_28_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_28_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_28_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_28_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_28_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_28_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_28_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_28_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_28_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_29_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_29_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_29_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_29_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_29_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_29_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_29_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_29_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_29_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_29_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_29_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_29_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_29_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_29_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_29_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_29_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_29_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_29_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_30_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_30_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_30_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_30_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_30_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_30_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_30_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_30_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_30_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_30_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_30_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_30_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_30_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_30_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_30_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_30_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_30_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_30_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_31_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_31_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_31_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_31_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_31_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_31_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31,31,31] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_31_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_31_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_31_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_31_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_31_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_31_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_31_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_31_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_31_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_31_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_31_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_31_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_31_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_31_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_31_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_32_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_32_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_32_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_32_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_32_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_32_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32,32,32] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_32_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32,32,32,32] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_32_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32,32,32,32] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_32_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_32_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32,32,32,32] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_32_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ugt_32_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ugt_32_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_32_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_32_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pshufb %xmm3, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: paddb %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: packuswb %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_32_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE41-NEXT: psadbw %xmm1, %xmm3 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_32_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_32_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32,32,32,32] -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_32_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32,32,32,32] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_32_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_32_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32,32,32,32] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_32_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ugt <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <4 x i32> @ult_33_v4i32(<4 x i32> %0) { -; SSE2-LABEL: ult_33_v4i32: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psadbw %xmm0, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: packuswb %xmm2, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33] -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_33_v4i32: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE3-NEXT: psadbw %xmm0, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: packuswb %xmm2, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33] -; SSE3-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_33_v4i32: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: psadbw %xmm0, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: packuswb %xmm1, %xmm3 -; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33] -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_33_v4i32: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero -; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33] -; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_33_v4i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33,33,33] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_33_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [33,33,33,33] -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_33_v4i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [33,33,33,33] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_33_v4i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_33_v4i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [33,33,33,33] -; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_33_v4i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 -; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) - %3 = icmp ult <4 x i32> %2, - %4 = sext <4 x i1> %3 to <4 x i32> - ret <4 x i32> %4 -} - -define <2 x i64> @ult_0_v2i64(<2 x i64> %0) { -; SSE-LABEL: ult_0_v2i64: -; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: retq -; -; AVX-LABEL: ult_0_v2i64: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_0_v2i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_0_v2i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, - %4 = sext <2 x i1> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define <2 x i64> @ugt_0_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_0_v2i64: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_0_v2i64: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] -; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE3-NEXT: pxor %xmm0, %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_0_v2i64: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,0,3,2] -; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_0_v2i64: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm0, %xmm3 -; SSE41-NEXT: pcmpeqq %xmm0, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_0_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_0_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v2i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v2i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_0_v2i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_0_v2i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, - %4 = sext <2 x i1> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define <2 x i64> @ult_1_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_1_v2i64: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_1_v2i64: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: psadbw %xmm0, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] -; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_1_v2i64: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: psadbw %xmm0, %xmm3 -; SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] -; SSSE3-NEXT: pand %xmm3, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_1_v2i64: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm1, %xmm4 -; SSE41-NEXT: pshufb %xmm3, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm1 -; SSE41-NEXT: paddb %xmm4, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_1_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_1_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_1_v2i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v2i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_1_v2i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_1_v2i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, - %4 = sext <2 x i1> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_1_v2i64: -; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: paddq %xmm1, %xmm2 -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_1_v2i64: -; SSE3: # %bb.0: -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: paddq %xmm1, %xmm2 -; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: pxor %xmm3, %xmm3 -; SSE3-NEXT: pcmpeqd %xmm2, %xmm3 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] -; SSE3-NEXT: pand %xmm3, %xmm0 -; SSE3-NEXT: pxor %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_1_v2i64: -; SSSE3: # %bb.0: -; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: paddq %xmm1, %xmm2 -; SSSE3-NEXT: pand %xmm0, %xmm2 -; SSSE3-NEXT: pxor %xmm3, %xmm3 -; SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] -; SSSE3-NEXT: pand %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_1_v2i64: -; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: paddq %xmm2, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 -; SSE41-NEXT: pxor %xmm2, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_1_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_1_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_1_v2i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_1_v2i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_1_v2i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, - %4 = sext <2 x i1> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define <2 x i64> @ult_2_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_2_v2i64: -; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: paddq %xmm0, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_2_v2i64: -; SSE3: # %bb.0: -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: paddq %xmm0, %xmm1 -; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: pxor %xmm2, %xmm2 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm2 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] -; SSE3-NEXT: pand %xmm2, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_2_v2i64: -; SSSE3: # %bb.0: -; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSSE3-NEXT: paddq %xmm0, %xmm1 -; SSSE3-NEXT: pand %xmm0, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_2_v2i64: -; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: paddq %xmm0, %xmm1 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_2_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_2_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_2_v2i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_2_v2i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_2_v2i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: retq - %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, - %4 = sext <2 x i1> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_2_v2i64: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm1, %xmm0 -; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_2_v2i64: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: psadbw %xmm1, %xmm0 -; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE3-NEXT: pand %xmm3, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_2_v2i64: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: psadbw %xmm3, %xmm0 -; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSSE3-NEXT: por %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_2_v2i64: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm3, %xmm0 -; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ugt_2_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_2_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_2_v2i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_2_v2i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, - %4 = sext <2 x i1> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define <2 x i64> @ult_3_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_3_v2i64: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm1, %xmm0 -; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ult_3_v2i64: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: psadbw %xmm1, %xmm0 -; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE3-NEXT: pand %xmm3, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ult_3_v2i64: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: psadbw %xmm3, %xmm0 -; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSSE3-NEXT: por %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ult_3_v2i64: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm3, %xmm0 -; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSE41-NEXT: movdqa %xmm1, %xmm2 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: ult_3_v2i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_3_v2i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] -; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_3_v2i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512VPOPCNTDQ-NEXT: vzeroupper -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_3_v2i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] -; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_3_v2i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, - %4 = sext <2 x i1> %3 to <2 x i64> - ret <2 x i64> %4 -} - -define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_3_v2i64: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm1, %xmm0 -; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: ugt_3_v2i64: -; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: psadbw %xmm1, %xmm0 -; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE3-NEXT: pand %xmm3, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: ugt_3_v2i64: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: psadbw %xmm3, %xmm0 -; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSSE3-NEXT: por %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: ugt_3_v2i64: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 +; SSSE3-NEXT: paddq %xmm1, %xmm2 +; SSSE3-NEXT: pand %xmm0, %xmm2 +; SSSE3-NEXT: pxor %xmm3, %xmm3 +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,0,3,2] +; SSSE3-NEXT: pand %xmm3, %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: ugt_1_v2i64: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: paddq %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm3, %xmm0 -; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pxor %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_3_v2i64: +; AVX1-LABEL: ugt_1_v2i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_3_v2i64: +; AVX2-LABEL: ugt_1_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_1_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -21153,7 +17058,7 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -21161,186 +17066,104 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_3_v2i64: +; BITALG_NOVLX-LABEL: ugt_1_v2i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_3_v2i64: +; BITALG-LABEL: ugt_1_v2i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_4_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_4_v2i64: +define <2 x i64> @ult_2_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_2_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm1, %xmm0 -; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: paddq %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] +; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_4_v2i64: +; SSE3-LABEL: ult_2_v2i64: ; SSE3: # %bb.0: -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] -; SSE3-NEXT: movdqa %xmm0, %xmm2 -; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm2, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm0, %xmm1 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: pxor %xmm0, %xmm0 -; SSE3-NEXT: psadbw %xmm1, %xmm0 -; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] -; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE3-NEXT: pand %xmm3, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE3-NEXT: paddq %xmm0, %xmm1 +; SSE3-NEXT: pand %xmm0, %xmm1 +; SSE3-NEXT: pxor %xmm2, %xmm2 +; SSE3-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] +; SSE3-NEXT: pand %xmm2, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_4_v2i64: +; SSSE3-LABEL: ult_2_v2i64: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm2 -; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pshufb %xmm2, %xmm4 -; SSSE3-NEXT: psrlw $4, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pshufb %xmm0, %xmm3 -; SSSE3-NEXT: paddb %xmm4, %xmm3 -; SSSE3-NEXT: pxor %xmm0, %xmm0 -; SSSE3-NEXT: psadbw %xmm3, %xmm0 -; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] -; SSSE3-NEXT: movdqa %xmm1, %xmm2 -; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 +; SSSE3-NEXT: paddq %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm2 +; SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2] +; SSSE3-NEXT: pand %xmm2, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_4_v2i64: +; SSE41-LABEL: ult_2_v2i64: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; SSE41-NEXT: movdqa %xmm3, %xmm4 -; SSE41-NEXT: pshufb %xmm2, %xmm4 -; SSE41-NEXT: psrlw $4, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: paddq %xmm0, %xmm1 ; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: paddb %xmm4, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm3, %xmm0 -; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] -; SSE41-NEXT: movdqa %xmm1, %xmm2 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_4_v2i64: +; AVX1-LABEL: ult_2_v2i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_4_v2i64: +; AVX2-LABEL: ult_2_v2i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_4_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_2_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -21348,34 +17171,31 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_4_v2i64: +; BITALG_NOVLX-LABEL: ult_2_v2i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; BITALG_NOVLX-NEXT: vzeroupper +; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_4_v2i64: +; BITALG-LABEL: ult_2_v2i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1 +; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 -; BITALG-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_4_v2i64: +define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_2_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -21394,7 +17214,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21405,7 +17225,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_4_v2i64: +; SSE3-LABEL: ugt_2_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -21424,7 +17244,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21435,7 +17255,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_4_v2i64: +; SSSE3-LABEL: ugt_2_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -21450,7 +17270,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21461,7 +17281,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_4_v2i64: +; SSE41-LABEL: ugt_2_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -21476,7 +17296,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483650,2147483650] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21487,7 +17307,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_4_v2i64: +; AVX1-LABEL: ugt_2_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21502,7 +17322,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_4_v2i64: +; AVX2-LABEL: ugt_2_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21517,7 +17337,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -21525,7 +17345,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -21533,7 +17353,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_4_v2i64: +; BITALG_NOVLX-LABEL: ugt_2_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -21543,7 +17363,7 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_4_v2i64: +; BITALG-LABEL: ugt_2_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21553,13 +17373,13 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_5_v2i64: +define <2 x i64> @ult_3_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_3_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -21578,7 +17398,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21589,7 +17409,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_5_v2i64: +; SSE3-LABEL: ult_3_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -21608,7 +17428,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21619,7 +17439,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_5_v2i64: +; SSSE3-LABEL: ult_3_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -21634,7 +17454,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21645,7 +17465,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_5_v2i64: +; SSE41-LABEL: ult_3_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -21660,7 +17480,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21671,7 +17491,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_5_v2i64: +; AVX1-LABEL: ult_3_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21683,11 +17503,11 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_5_v2i64: +; AVX2-LABEL: ult_3_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21699,20 +17519,20 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_5_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_3_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -21720,18 +17540,18 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_5_v2i64: +; BITALG_NOVLX-LABEL: ult_3_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_5_v2i64: +; BITALG-LABEL: ult_3_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21741,13 +17561,13 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_5_v2i64: +define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_3_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -21766,7 +17586,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21777,7 +17597,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_5_v2i64: +; SSE3-LABEL: ugt_3_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -21796,7 +17616,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21807,7 +17627,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_5_v2i64: +; SSSE3-LABEL: ugt_3_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -21822,7 +17642,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21833,7 +17653,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_5_v2i64: +; SSE41-LABEL: ugt_3_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -21848,7 +17668,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483651,2147483651] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21859,7 +17679,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_5_v2i64: +; AVX1-LABEL: ugt_3_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21874,7 +17694,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_5_v2i64: +; AVX2-LABEL: ugt_3_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21889,7 +17709,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -21897,7 +17717,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -21905,7 +17725,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_5_v2i64: +; BITALG_NOVLX-LABEL: ugt_3_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -21915,7 +17735,7 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_5_v2i64: +; BITALG-LABEL: ugt_3_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21925,13 +17745,13 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_6_v2i64: +define <2 x i64> @ult_4_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_4_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -21950,7 +17770,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21961,7 +17781,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_6_v2i64: +; SSE3-LABEL: ult_4_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -21980,7 +17800,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -21991,7 +17811,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_6_v2i64: +; SSSE3-LABEL: ult_4_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -22006,7 +17826,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22017,7 +17837,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_6_v2i64: +; SSE41-LABEL: ult_4_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -22032,7 +17852,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22043,7 +17863,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_6_v2i64: +; AVX1-LABEL: ult_4_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22055,11 +17875,11 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_6_v2i64: +; AVX2-LABEL: ult_4_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22071,20 +17891,20 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_6_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_4_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -22092,18 +17912,18 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_6_v2i64: +; BITALG_NOVLX-LABEL: ult_4_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_6_v2i64: +; BITALG-LABEL: ult_4_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -22113,13 +17933,13 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_6_v2i64: +define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_4_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -22138,7 +17958,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22149,7 +17969,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_6_v2i64: +; SSE3-LABEL: ugt_4_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -22168,7 +17988,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22179,7 +17999,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_6_v2i64: +; SSSE3-LABEL: ugt_4_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -22194,7 +18014,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22205,7 +18025,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_6_v2i64: +; SSE41-LABEL: ugt_4_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -22220,7 +18040,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22231,7 +18051,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_6_v2i64: +; AVX1-LABEL: ugt_4_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22246,7 +18066,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_6_v2i64: +; AVX2-LABEL: ugt_4_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22261,7 +18081,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -22269,7 +18089,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -22277,7 +18097,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_6_v2i64: +; BITALG_NOVLX-LABEL: ugt_4_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -22287,7 +18107,7 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_6_v2i64: +; BITALG-LABEL: ugt_4_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -22297,13 +18117,13 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_7_v2i64: +define <2 x i64> @ult_5_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_5_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -22322,7 +18142,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22333,7 +18153,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_7_v2i64: +; SSE3-LABEL: ult_5_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -22352,7 +18172,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22363,7 +18183,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_7_v2i64: +; SSSE3-LABEL: ult_5_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -22378,7 +18198,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22389,7 +18209,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_7_v2i64: +; SSE41-LABEL: ult_5_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -22404,7 +18224,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22415,7 +18235,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_7_v2i64: +; AVX1-LABEL: ult_5_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22427,11 +18247,11 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_7_v2i64: +; AVX2-LABEL: ult_5_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22443,20 +18263,20 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_7_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_5_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -22464,18 +18284,18 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_7_v2i64: +; BITALG_NOVLX-LABEL: ult_5_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_7_v2i64: +; BITALG-LABEL: ult_5_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -22485,13 +18305,13 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_7_v2i64: +define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_5_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -22510,7 +18330,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22521,7 +18341,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_7_v2i64: +; SSE3-LABEL: ugt_5_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -22540,7 +18360,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22551,7 +18371,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_7_v2i64: +; SSSE3-LABEL: ugt_5_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -22566,7 +18386,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22577,7 +18397,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_7_v2i64: +; SSE41-LABEL: ugt_5_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -22592,7 +18412,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483653,2147483653] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22603,7 +18423,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_7_v2i64: +; AVX1-LABEL: ugt_5_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22618,7 +18438,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v2i64: +; AVX2-LABEL: ugt_5_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22633,7 +18453,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -22641,7 +18461,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -22649,7 +18469,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v2i64: +; BITALG_NOVLX-LABEL: ugt_5_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -22659,7 +18479,7 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v2i64: +; BITALG-LABEL: ugt_5_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -22669,13 +18489,13 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_8_v2i64: +define <2 x i64> @ult_6_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_6_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -22694,7 +18514,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22705,7 +18525,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_8_v2i64: +; SSE3-LABEL: ult_6_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -22724,7 +18544,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22735,7 +18555,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_8_v2i64: +; SSSE3-LABEL: ult_6_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -22750,7 +18570,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22761,7 +18581,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_8_v2i64: +; SSE41-LABEL: ult_6_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -22776,7 +18596,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22787,7 +18607,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_8_v2i64: +; AVX1-LABEL: ult_6_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22799,11 +18619,11 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v2i64: +; AVX2-LABEL: ult_6_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22815,20 +18635,20 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_6_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -22836,18 +18656,18 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v2i64: +; BITALG_NOVLX-LABEL: ult_6_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v2i64: +; BITALG-LABEL: ult_6_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -22857,13 +18677,13 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_8_v2i64: +define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_6_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -22882,7 +18702,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22893,7 +18713,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_8_v2i64: +; SSE3-LABEL: ugt_6_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -22912,7 +18732,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22923,7 +18743,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_8_v2i64: +; SSSE3-LABEL: ugt_6_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -22938,7 +18758,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22949,7 +18769,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_8_v2i64: +; SSE41-LABEL: ugt_6_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -22964,7 +18784,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483654,2147483654] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -22975,7 +18795,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_8_v2i64: +; AVX1-LABEL: ugt_6_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -22990,7 +18810,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v2i64: +; AVX2-LABEL: ugt_6_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23005,7 +18825,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -23013,7 +18833,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -23021,7 +18841,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v2i64: +; BITALG_NOVLX-LABEL: ugt_6_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -23031,7 +18851,7 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v2i64: +; BITALG-LABEL: ugt_6_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -23041,13 +18861,13 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_9_v2i64: +define <2 x i64> @ult_7_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_7_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -23066,7 +18886,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23077,7 +18897,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_9_v2i64: +; SSE3-LABEL: ult_7_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -23096,7 +18916,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23107,7 +18927,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_9_v2i64: +; SSSE3-LABEL: ult_7_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -23122,7 +18942,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23133,7 +18953,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_9_v2i64: +; SSE41-LABEL: ult_7_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -23148,7 +18968,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23159,7 +18979,7 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_9_v2i64: +; AVX1-LABEL: ult_7_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23171,11 +18991,11 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v2i64: +; AVX2-LABEL: ult_7_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23187,20 +19007,20 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_7_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -23208,18 +19028,18 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v2i64: +; BITALG_NOVLX-LABEL: ult_7_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v2i64: +; BITALG-LABEL: ult_7_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -23229,13 +19049,13 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_9_v2i64: +define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_7_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -23254,7 +19074,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23265,7 +19085,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_9_v2i64: +; SSE3-LABEL: ugt_7_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -23284,7 +19104,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23295,7 +19115,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_9_v2i64: +; SSSE3-LABEL: ugt_7_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -23310,7 +19130,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23321,7 +19141,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_9_v2i64: +; SSE41-LABEL: ugt_7_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -23336,7 +19156,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483655,2147483655] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23347,7 +19167,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_9_v2i64: +; AVX1-LABEL: ugt_7_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23362,7 +19182,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_9_v2i64: +; AVX2-LABEL: ugt_7_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23377,7 +19197,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -23385,7 +19205,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -23393,7 +19213,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_9_v2i64: +; BITALG_NOVLX-LABEL: ugt_7_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -23403,7 +19223,7 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_9_v2i64: +; BITALG-LABEL: ugt_7_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -23413,13 +19233,13 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_10_v2i64: +define <2 x i64> @ult_8_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_8_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -23438,7 +19258,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23449,7 +19269,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_10_v2i64: +; SSE3-LABEL: ult_8_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -23468,7 +19288,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23479,7 +19299,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_10_v2i64: +; SSSE3-LABEL: ult_8_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -23494,7 +19314,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23505,7 +19325,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_10_v2i64: +; SSE41-LABEL: ult_8_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -23520,7 +19340,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23531,7 +19351,7 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_10_v2i64: +; AVX1-LABEL: ult_8_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23543,11 +19363,11 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_10_v2i64: +; AVX2-LABEL: ult_8_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23559,20 +19379,20 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_10_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_8_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -23580,18 +19400,18 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_10_v2i64: +; BITALG_NOVLX-LABEL: ult_8_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_10_v2i64: +; BITALG-LABEL: ult_8_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -23601,13 +19421,13 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_10_v2i64: +define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_8_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -23626,7 +19446,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23637,7 +19457,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_10_v2i64: +; SSE3-LABEL: ugt_8_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -23656,7 +19476,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23667,7 +19487,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_10_v2i64: +; SSSE3-LABEL: ugt_8_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -23682,7 +19502,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23693,7 +19513,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_10_v2i64: +; SSE41-LABEL: ugt_8_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -23708,7 +19528,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483656,2147483656] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23719,7 +19539,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_10_v2i64: +; AVX1-LABEL: ugt_8_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23734,7 +19554,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_10_v2i64: +; AVX2-LABEL: ugt_8_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23749,7 +19569,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -23757,7 +19577,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -23765,7 +19585,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_10_v2i64: +; BITALG_NOVLX-LABEL: ugt_8_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -23775,7 +19595,7 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_10_v2i64: +; BITALG-LABEL: ugt_8_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -23785,13 +19605,13 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_11_v2i64: +define <2 x i64> @ult_9_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_9_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -23810,7 +19630,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23821,7 +19641,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_11_v2i64: +; SSE3-LABEL: ult_9_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -23840,7 +19660,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23851,7 +19671,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_11_v2i64: +; SSSE3-LABEL: ult_9_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -23866,7 +19686,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23877,7 +19697,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_11_v2i64: +; SSE41-LABEL: ult_9_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -23892,7 +19712,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -23903,7 +19723,7 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_11_v2i64: +; AVX1-LABEL: ult_9_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23915,11 +19735,11 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_11_v2i64: +; AVX2-LABEL: ult_9_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -23931,20 +19751,20 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_11_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_9_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -23952,18 +19772,18 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_11_v2i64: +; BITALG_NOVLX-LABEL: ult_9_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_11_v2i64: +; BITALG-LABEL: ult_9_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -23973,13 +19793,13 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_11_v2i64: +define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_9_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -23998,7 +19818,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24009,7 +19829,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_11_v2i64: +; SSE3-LABEL: ugt_9_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -24028,7 +19848,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24039,7 +19859,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_11_v2i64: +; SSSE3-LABEL: ugt_9_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -24054,7 +19874,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24065,7 +19885,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_11_v2i64: +; SSE41-LABEL: ugt_9_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -24080,7 +19900,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483657,2147483657] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24091,7 +19911,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_11_v2i64: +; AVX1-LABEL: ugt_9_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24106,7 +19926,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_11_v2i64: +; AVX2-LABEL: ugt_9_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24121,7 +19941,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -24129,7 +19949,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -24137,7 +19957,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_11_v2i64: +; BITALG_NOVLX-LABEL: ugt_9_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -24147,7 +19967,7 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_11_v2i64: +; BITALG-LABEL: ugt_9_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -24157,13 +19977,13 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_12_v2i64: +define <2 x i64> @ult_10_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_10_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -24182,7 +20002,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24193,7 +20013,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_12_v2i64: +; SSE3-LABEL: ult_10_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -24212,7 +20032,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24223,7 +20043,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_12_v2i64: +; SSSE3-LABEL: ult_10_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -24238,7 +20058,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24249,7 +20069,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_12_v2i64: +; SSE41-LABEL: ult_10_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -24264,7 +20084,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24275,7 +20095,7 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_12_v2i64: +; AVX1-LABEL: ult_10_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24287,11 +20107,11 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_12_v2i64: +; AVX2-LABEL: ult_10_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24303,20 +20123,20 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_12_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_10_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -24324,18 +20144,18 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_12_v2i64: +; BITALG_NOVLX-LABEL: ult_10_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_12_v2i64: +; BITALG-LABEL: ult_10_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -24345,13 +20165,13 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_12_v2i64: +define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_10_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -24370,7 +20190,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24381,7 +20201,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_12_v2i64: +; SSE3-LABEL: ugt_10_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -24400,7 +20220,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24411,7 +20231,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_12_v2i64: +; SSSE3-LABEL: ugt_10_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -24426,7 +20246,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24437,7 +20257,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_12_v2i64: +; SSE41-LABEL: ugt_10_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -24452,7 +20272,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483658,2147483658] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24463,7 +20283,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_12_v2i64: +; AVX1-LABEL: ugt_10_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24478,7 +20298,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_12_v2i64: +; AVX2-LABEL: ugt_10_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24493,7 +20313,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -24501,7 +20321,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -24509,7 +20329,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_12_v2i64: +; BITALG_NOVLX-LABEL: ugt_10_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -24519,7 +20339,7 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_12_v2i64: +; BITALG-LABEL: ugt_10_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -24529,13 +20349,13 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_13_v2i64: +define <2 x i64> @ult_11_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_11_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -24554,7 +20374,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24565,7 +20385,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_13_v2i64: +; SSE3-LABEL: ult_11_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -24584,7 +20404,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24595,7 +20415,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_13_v2i64: +; SSSE3-LABEL: ult_11_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -24610,7 +20430,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24621,7 +20441,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_13_v2i64: +; SSE41-LABEL: ult_11_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -24636,7 +20456,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24647,7 +20467,7 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_13_v2i64: +; AVX1-LABEL: ult_11_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24659,11 +20479,11 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_13_v2i64: +; AVX2-LABEL: ult_11_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24675,20 +20495,20 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_13_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_11_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -24696,18 +20516,18 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_13_v2i64: +; BITALG_NOVLX-LABEL: ult_11_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_13_v2i64: +; BITALG-LABEL: ult_11_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -24717,13 +20537,13 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_13_v2i64: +define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_11_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -24742,7 +20562,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24753,7 +20573,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_13_v2i64: +; SSE3-LABEL: ugt_11_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -24772,7 +20592,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24783,7 +20603,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_13_v2i64: +; SSSE3-LABEL: ugt_11_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -24798,7 +20618,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24809,7 +20629,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_13_v2i64: +; SSE41-LABEL: ugt_11_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -24824,7 +20644,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483659,2147483659] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24835,7 +20655,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_13_v2i64: +; AVX1-LABEL: ugt_11_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24850,7 +20670,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_13_v2i64: +; AVX2-LABEL: ugt_11_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -24865,7 +20685,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -24873,7 +20693,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -24881,7 +20701,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_13_v2i64: +; BITALG_NOVLX-LABEL: ugt_11_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -24891,7 +20711,7 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_13_v2i64: +; BITALG-LABEL: ugt_11_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -24901,13 +20721,13 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_14_v2i64: +define <2 x i64> @ult_12_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_12_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -24926,7 +20746,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24937,7 +20757,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_14_v2i64: +; SSE3-LABEL: ult_12_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -24956,7 +20776,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24967,7 +20787,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_14_v2i64: +; SSSE3-LABEL: ult_12_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -24982,7 +20802,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -24993,7 +20813,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_14_v2i64: +; SSE41-LABEL: ult_12_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -25008,7 +20828,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25019,7 +20839,7 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_14_v2i64: +; AVX1-LABEL: ult_12_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25031,11 +20851,11 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_14_v2i64: +; AVX2-LABEL: ult_12_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25047,20 +20867,20 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_14_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_12_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -25068,18 +20888,18 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_14_v2i64: +; BITALG_NOVLX-LABEL: ult_12_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_14_v2i64: +; BITALG-LABEL: ult_12_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -25089,13 +20909,13 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_14_v2i64: +define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_12_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -25114,7 +20934,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25125,7 +20945,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_14_v2i64: +; SSE3-LABEL: ugt_12_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -25144,7 +20964,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25155,7 +20975,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_14_v2i64: +; SSSE3-LABEL: ugt_12_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -25170,7 +20990,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25181,7 +21001,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_14_v2i64: +; SSE41-LABEL: ugt_12_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -25196,7 +21016,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483660,2147483660] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25207,7 +21027,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_14_v2i64: +; AVX1-LABEL: ugt_12_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25222,7 +21042,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_14_v2i64: +; AVX2-LABEL: ugt_12_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25237,7 +21057,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -25245,7 +21065,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -25253,7 +21073,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_14_v2i64: +; BITALG_NOVLX-LABEL: ugt_12_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -25263,7 +21083,7 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_14_v2i64: +; BITALG-LABEL: ugt_12_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -25273,13 +21093,13 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_15_v2i64: +define <2 x i64> @ult_13_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_13_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -25298,7 +21118,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25309,7 +21129,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_15_v2i64: +; SSE3-LABEL: ult_13_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -25328,7 +21148,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25339,7 +21159,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_15_v2i64: +; SSSE3-LABEL: ult_13_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -25354,7 +21174,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25365,7 +21185,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_15_v2i64: +; SSE41-LABEL: ult_13_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -25380,7 +21200,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25391,7 +21211,7 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_15_v2i64: +; AVX1-LABEL: ult_13_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25403,11 +21223,11 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_15_v2i64: +; AVX2-LABEL: ult_13_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25419,20 +21239,20 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_15_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_13_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -25440,18 +21260,18 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_15_v2i64: +; BITALG_NOVLX-LABEL: ult_13_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_15_v2i64: +; BITALG-LABEL: ult_13_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -25461,13 +21281,13 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_15_v2i64: +define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_13_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -25486,7 +21306,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25497,7 +21317,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_15_v2i64: +; SSE3-LABEL: ugt_13_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -25516,7 +21336,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25527,7 +21347,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_15_v2i64: +; SSSE3-LABEL: ugt_13_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -25542,7 +21362,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25553,7 +21373,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_15_v2i64: +; SSE41-LABEL: ugt_13_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -25568,7 +21388,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483661,2147483661] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25579,7 +21399,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_15_v2i64: +; AVX1-LABEL: ugt_13_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25594,7 +21414,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_15_v2i64: +; AVX2-LABEL: ugt_13_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25609,7 +21429,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -25617,7 +21437,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -25625,7 +21445,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_15_v2i64: +; BITALG_NOVLX-LABEL: ugt_13_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -25635,7 +21455,7 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_15_v2i64: +; BITALG-LABEL: ugt_13_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -25645,13 +21465,13 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_16_v2i64: +define <2 x i64> @ult_14_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_14_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -25670,7 +21490,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25681,7 +21501,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_16_v2i64: +; SSE3-LABEL: ult_14_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -25700,7 +21520,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25711,7 +21531,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_16_v2i64: +; SSSE3-LABEL: ult_14_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -25726,7 +21546,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25737,7 +21557,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_16_v2i64: +; SSE41-LABEL: ult_14_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -25752,7 +21572,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25763,7 +21583,7 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_16_v2i64: +; AVX1-LABEL: ult_14_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25775,11 +21595,11 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_16_v2i64: +; AVX2-LABEL: ult_14_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25791,20 +21611,20 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_16_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_14_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -25812,18 +21632,18 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_16_v2i64: +; BITALG_NOVLX-LABEL: ult_14_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_16_v2i64: +; BITALG-LABEL: ult_14_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -25833,13 +21653,13 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_16_v2i64: +define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_14_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -25858,7 +21678,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25869,7 +21689,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_16_v2i64: +; SSE3-LABEL: ugt_14_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -25888,7 +21708,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25899,7 +21719,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_16_v2i64: +; SSSE3-LABEL: ugt_14_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -25914,7 +21734,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25925,7 +21745,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_16_v2i64: +; SSE41-LABEL: ugt_14_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -25940,7 +21760,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483662,2147483662] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -25951,7 +21771,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_16_v2i64: +; AVX1-LABEL: ugt_14_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25966,7 +21786,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_16_v2i64: +; AVX2-LABEL: ugt_14_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -25981,7 +21801,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -25989,7 +21809,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -25997,7 +21817,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_16_v2i64: +; BITALG_NOVLX-LABEL: ugt_14_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -26007,7 +21827,7 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_16_v2i64: +; BITALG-LABEL: ugt_14_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -26017,13 +21837,13 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_17_v2i64: +define <2 x i64> @ult_15_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_15_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -26042,7 +21862,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26053,7 +21873,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_17_v2i64: +; SSE3-LABEL: ult_15_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -26072,7 +21892,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26083,7 +21903,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_17_v2i64: +; SSSE3-LABEL: ult_15_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -26098,7 +21918,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26109,7 +21929,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_17_v2i64: +; SSE41-LABEL: ult_15_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -26124,7 +21944,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26135,7 +21955,7 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_17_v2i64: +; AVX1-LABEL: ult_15_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26147,11 +21967,11 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_17_v2i64: +; AVX2-LABEL: ult_15_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26163,20 +21983,20 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_17_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_15_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -26184,18 +22004,18 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_17_v2i64: +; BITALG_NOVLX-LABEL: ult_15_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_17_v2i64: +; BITALG-LABEL: ult_15_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -26205,13 +22025,13 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_17_v2i64: +define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_15_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -26230,7 +22050,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26241,7 +22061,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_17_v2i64: +; SSE3-LABEL: ugt_15_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -26260,7 +22080,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26271,7 +22091,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_17_v2i64: +; SSSE3-LABEL: ugt_15_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -26286,7 +22106,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26297,7 +22117,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_17_v2i64: +; SSE41-LABEL: ugt_15_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -26312,7 +22132,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483663,2147483663] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26323,7 +22143,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_17_v2i64: +; AVX1-LABEL: ugt_15_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26338,7 +22158,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_17_v2i64: +; AVX2-LABEL: ugt_15_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26353,7 +22173,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -26361,7 +22181,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -26369,7 +22189,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_17_v2i64: +; BITALG_NOVLX-LABEL: ugt_15_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -26379,7 +22199,7 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_17_v2i64: +; BITALG-LABEL: ugt_15_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -26389,13 +22209,13 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_18_v2i64: +define <2 x i64> @ult_16_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_16_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -26414,7 +22234,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26425,7 +22245,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_18_v2i64: +; SSE3-LABEL: ult_16_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -26444,7 +22264,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26455,7 +22275,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_18_v2i64: +; SSSE3-LABEL: ult_16_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -26470,7 +22290,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26481,7 +22301,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_18_v2i64: +; SSE41-LABEL: ult_16_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -26496,7 +22316,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26507,7 +22327,7 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_18_v2i64: +; AVX1-LABEL: ult_16_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26519,11 +22339,11 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_18_v2i64: +; AVX2-LABEL: ult_16_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26535,20 +22355,20 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_18_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_16_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -26556,18 +22376,18 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_18_v2i64: +; BITALG_NOVLX-LABEL: ult_16_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_18_v2i64: +; BITALG-LABEL: ult_16_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -26577,13 +22397,13 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_18_v2i64: +define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_16_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -26602,7 +22422,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26613,7 +22433,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_18_v2i64: +; SSE3-LABEL: ugt_16_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -26632,7 +22452,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26643,7 +22463,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_18_v2i64: +; SSSE3-LABEL: ugt_16_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -26658,7 +22478,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26669,7 +22489,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_18_v2i64: +; SSE41-LABEL: ugt_16_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -26684,7 +22504,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483664,2147483664] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26695,7 +22515,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_18_v2i64: +; AVX1-LABEL: ugt_16_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26710,7 +22530,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_18_v2i64: +; AVX2-LABEL: ugt_16_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26725,7 +22545,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -26733,7 +22553,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -26741,7 +22561,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_18_v2i64: +; BITALG_NOVLX-LABEL: ugt_16_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -26751,7 +22571,7 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_18_v2i64: +; BITALG-LABEL: ugt_16_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -26761,13 +22581,13 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_19_v2i64: +define <2 x i64> @ult_17_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_17_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -26786,7 +22606,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26797,7 +22617,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_19_v2i64: +; SSE3-LABEL: ult_17_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -26816,7 +22636,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26827,7 +22647,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_19_v2i64: +; SSSE3-LABEL: ult_17_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -26842,7 +22662,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26853,7 +22673,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_19_v2i64: +; SSE41-LABEL: ult_17_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -26868,7 +22688,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26879,7 +22699,7 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_19_v2i64: +; AVX1-LABEL: ult_17_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26891,11 +22711,11 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_19_v2i64: +; AVX2-LABEL: ult_17_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -26907,20 +22727,20 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_19_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_17_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -26928,18 +22748,18 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_19_v2i64: +; BITALG_NOVLX-LABEL: ult_17_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_19_v2i64: +; BITALG-LABEL: ult_17_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -26949,13 +22769,13 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_19_v2i64: +define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_17_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -26974,7 +22794,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -26985,7 +22805,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_19_v2i64: +; SSE3-LABEL: ugt_17_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -27004,7 +22824,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27015,7 +22835,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_19_v2i64: +; SSSE3-LABEL: ugt_17_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -27030,7 +22850,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27041,7 +22861,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_19_v2i64: +; SSE41-LABEL: ugt_17_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -27056,7 +22876,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483665,2147483665] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27067,7 +22887,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_19_v2i64: +; AVX1-LABEL: ugt_17_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27082,7 +22902,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_19_v2i64: +; AVX2-LABEL: ugt_17_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27097,7 +22917,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -27105,7 +22925,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -27113,7 +22933,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_19_v2i64: +; BITALG_NOVLX-LABEL: ugt_17_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -27123,7 +22943,7 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_19_v2i64: +; BITALG-LABEL: ugt_17_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -27133,13 +22953,13 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_20_v2i64: +define <2 x i64> @ult_18_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_18_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -27158,7 +22978,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27169,7 +22989,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_20_v2i64: +; SSE3-LABEL: ult_18_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -27188,7 +23008,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27199,7 +23019,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_20_v2i64: +; SSSE3-LABEL: ult_18_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -27214,7 +23034,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27225,7 +23045,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_20_v2i64: +; SSE41-LABEL: ult_18_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -27240,7 +23060,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27251,7 +23071,7 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_20_v2i64: +; AVX1-LABEL: ult_18_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27263,11 +23083,11 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_20_v2i64: +; AVX2-LABEL: ult_18_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27279,20 +23099,20 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_20_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_18_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -27300,18 +23120,18 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_20_v2i64: +; BITALG_NOVLX-LABEL: ult_18_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_20_v2i64: +; BITALG-LABEL: ult_18_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -27321,13 +23141,13 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_20_v2i64: +define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_18_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -27346,7 +23166,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27357,7 +23177,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_20_v2i64: +; SSE3-LABEL: ugt_18_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -27376,7 +23196,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27387,7 +23207,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_20_v2i64: +; SSSE3-LABEL: ugt_18_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -27402,7 +23222,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27413,7 +23233,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_20_v2i64: +; SSE41-LABEL: ugt_18_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -27428,7 +23248,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483666,2147483666] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27439,7 +23259,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_20_v2i64: +; AVX1-LABEL: ugt_18_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27454,7 +23274,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_20_v2i64: +; AVX2-LABEL: ugt_18_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27469,7 +23289,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -27477,7 +23297,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -27485,7 +23305,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_20_v2i64: +; BITALG_NOVLX-LABEL: ugt_18_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -27495,7 +23315,7 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_20_v2i64: +; BITALG-LABEL: ugt_18_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -27505,13 +23325,13 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_21_v2i64: +define <2 x i64> @ult_19_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_19_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -27530,7 +23350,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27541,7 +23361,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_21_v2i64: +; SSE3-LABEL: ult_19_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -27560,7 +23380,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27571,7 +23391,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_21_v2i64: +; SSSE3-LABEL: ult_19_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -27586,7 +23406,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27597,7 +23417,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_21_v2i64: +; SSE41-LABEL: ult_19_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -27612,7 +23432,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27623,7 +23443,7 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_21_v2i64: +; AVX1-LABEL: ult_19_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27635,11 +23455,11 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_21_v2i64: +; AVX2-LABEL: ult_19_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27651,20 +23471,20 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_21_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_19_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -27672,18 +23492,18 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_21_v2i64: +; BITALG_NOVLX-LABEL: ult_19_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_21_v2i64: +; BITALG-LABEL: ult_19_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -27693,13 +23513,13 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_21_v2i64: +define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_19_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -27718,7 +23538,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27729,7 +23549,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_21_v2i64: +; SSE3-LABEL: ugt_19_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -27748,7 +23568,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27759,7 +23579,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_21_v2i64: +; SSSE3-LABEL: ugt_19_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -27774,7 +23594,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27785,7 +23605,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_21_v2i64: +; SSE41-LABEL: ugt_19_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -27800,7 +23620,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483667,2147483667] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27811,7 +23631,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_21_v2i64: +; AVX1-LABEL: ugt_19_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27826,7 +23646,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_21_v2i64: +; AVX2-LABEL: ugt_19_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -27841,7 +23661,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -27849,7 +23669,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -27857,7 +23677,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_21_v2i64: +; BITALG_NOVLX-LABEL: ugt_19_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -27867,7 +23687,7 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_21_v2i64: +; BITALG-LABEL: ugt_19_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -27877,13 +23697,13 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_22_v2i64: +define <2 x i64> @ult_20_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_20_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -27902,7 +23722,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27913,7 +23733,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_22_v2i64: +; SSE3-LABEL: ult_20_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -27932,7 +23752,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27943,7 +23763,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_22_v2i64: +; SSSE3-LABEL: ult_20_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -27958,7 +23778,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27969,7 +23789,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_22_v2i64: +; SSE41-LABEL: ult_20_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -27984,7 +23804,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -27995,7 +23815,7 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_22_v2i64: +; AVX1-LABEL: ult_20_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28007,11 +23827,11 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_22_v2i64: +; AVX2-LABEL: ult_20_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28023,20 +23843,20 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_22_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_20_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -28044,18 +23864,18 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_22_v2i64: +; BITALG_NOVLX-LABEL: ult_20_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_22_v2i64: +; BITALG-LABEL: ult_20_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -28065,13 +23885,13 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_22_v2i64: +define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_20_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -28090,7 +23910,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28101,7 +23921,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_22_v2i64: +; SSE3-LABEL: ugt_20_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -28120,7 +23940,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28131,7 +23951,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_22_v2i64: +; SSSE3-LABEL: ugt_20_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -28146,7 +23966,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28157,7 +23977,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_22_v2i64: +; SSE41-LABEL: ugt_20_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -28172,7 +23992,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483668,2147483668] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28183,7 +24003,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_22_v2i64: +; AVX1-LABEL: ugt_20_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28198,7 +24018,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_22_v2i64: +; AVX2-LABEL: ugt_20_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28213,7 +24033,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -28221,7 +24041,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -28229,7 +24049,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_22_v2i64: +; BITALG_NOVLX-LABEL: ugt_20_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -28239,7 +24059,7 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_22_v2i64: +; BITALG-LABEL: ugt_20_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -28249,13 +24069,13 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_23_v2i64: +define <2 x i64> @ult_21_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_21_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -28274,7 +24094,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28285,7 +24105,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_23_v2i64: +; SSE3-LABEL: ult_21_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -28304,7 +24124,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28315,7 +24135,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_23_v2i64: +; SSSE3-LABEL: ult_21_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -28330,7 +24150,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28341,7 +24161,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_23_v2i64: +; SSE41-LABEL: ult_21_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -28356,7 +24176,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28367,7 +24187,7 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_23_v2i64: +; AVX1-LABEL: ult_21_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28379,11 +24199,11 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_23_v2i64: +; AVX2-LABEL: ult_21_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28395,20 +24215,20 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_23_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_21_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -28416,18 +24236,18 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_23_v2i64: +; BITALG_NOVLX-LABEL: ult_21_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_23_v2i64: +; BITALG-LABEL: ult_21_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -28437,13 +24257,13 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_23_v2i64: +define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_21_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -28462,7 +24282,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28473,7 +24293,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_23_v2i64: +; SSE3-LABEL: ugt_21_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -28492,7 +24312,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28503,7 +24323,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_23_v2i64: +; SSSE3-LABEL: ugt_21_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -28518,7 +24338,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28529,7 +24349,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_23_v2i64: +; SSE41-LABEL: ugt_21_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -28544,7 +24364,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483669,2147483669] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28555,7 +24375,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_23_v2i64: +; AVX1-LABEL: ugt_21_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28570,7 +24390,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_23_v2i64: +; AVX2-LABEL: ugt_21_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28585,7 +24405,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -28593,7 +24413,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -28601,7 +24421,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_23_v2i64: +; BITALG_NOVLX-LABEL: ugt_21_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -28611,7 +24431,7 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_23_v2i64: +; BITALG-LABEL: ugt_21_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -28621,13 +24441,13 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_24_v2i64: +define <2 x i64> @ult_22_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_22_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -28646,7 +24466,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28657,7 +24477,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_24_v2i64: +; SSE3-LABEL: ult_22_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -28676,7 +24496,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28687,7 +24507,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_24_v2i64: +; SSSE3-LABEL: ult_22_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -28702,7 +24522,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28713,7 +24533,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_24_v2i64: +; SSE41-LABEL: ult_22_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -28728,7 +24548,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28739,7 +24559,7 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_24_v2i64: +; AVX1-LABEL: ult_22_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28751,11 +24571,11 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_24_v2i64: +; AVX2-LABEL: ult_22_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28767,20 +24587,20 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_24_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_22_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -28788,18 +24608,18 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_24_v2i64: +; BITALG_NOVLX-LABEL: ult_22_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_24_v2i64: +; BITALG-LABEL: ult_22_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -28809,13 +24629,13 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_24_v2i64: +define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_22_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -28834,7 +24654,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28845,7 +24665,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_24_v2i64: +; SSE3-LABEL: ugt_22_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -28864,7 +24684,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28875,7 +24695,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_24_v2i64: +; SSSE3-LABEL: ugt_22_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -28890,7 +24710,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28901,7 +24721,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_24_v2i64: +; SSE41-LABEL: ugt_22_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -28916,7 +24736,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483670,2147483670] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -28927,7 +24747,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_24_v2i64: +; AVX1-LABEL: ugt_22_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28942,7 +24762,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_24_v2i64: +; AVX2-LABEL: ugt_22_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -28957,7 +24777,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -28965,7 +24785,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -28973,7 +24793,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_24_v2i64: +; BITALG_NOVLX-LABEL: ugt_22_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -28983,7 +24803,7 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_24_v2i64: +; BITALG-LABEL: ugt_22_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -28993,13 +24813,13 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_25_v2i64: +define <2 x i64> @ult_23_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_23_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -29018,7 +24838,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29029,7 +24849,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_25_v2i64: +; SSE3-LABEL: ult_23_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -29048,7 +24868,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29059,7 +24879,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_25_v2i64: +; SSSE3-LABEL: ult_23_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -29074,7 +24894,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29085,7 +24905,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_25_v2i64: +; SSE41-LABEL: ult_23_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -29100,7 +24920,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29111,7 +24931,7 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_25_v2i64: +; AVX1-LABEL: ult_23_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29123,11 +24943,11 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_25_v2i64: +; AVX2-LABEL: ult_23_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29139,20 +24959,20 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_25_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_23_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -29160,18 +24980,18 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_25_v2i64: +; BITALG_NOVLX-LABEL: ult_23_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_25_v2i64: +; BITALG-LABEL: ult_23_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -29181,13 +25001,13 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_25_v2i64: +define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_23_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -29206,7 +25026,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29217,7 +25037,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_25_v2i64: +; SSE3-LABEL: ugt_23_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -29236,7 +25056,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29247,7 +25067,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_25_v2i64: +; SSSE3-LABEL: ugt_23_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -29262,7 +25082,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29273,7 +25093,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_25_v2i64: +; SSE41-LABEL: ugt_23_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -29288,7 +25108,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483671,2147483671] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29299,7 +25119,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_25_v2i64: +; AVX1-LABEL: ugt_23_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29314,7 +25134,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_25_v2i64: +; AVX2-LABEL: ugt_23_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29329,7 +25149,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -29337,7 +25157,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -29345,7 +25165,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_25_v2i64: +; BITALG_NOVLX-LABEL: ugt_23_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -29355,7 +25175,7 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_25_v2i64: +; BITALG-LABEL: ugt_23_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -29365,13 +25185,13 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_26_v2i64: +define <2 x i64> @ult_24_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_24_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -29390,7 +25210,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29401,7 +25221,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_26_v2i64: +; SSE3-LABEL: ult_24_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -29420,7 +25240,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29431,7 +25251,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_26_v2i64: +; SSSE3-LABEL: ult_24_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -29446,7 +25266,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29457,7 +25277,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_26_v2i64: +; SSE41-LABEL: ult_24_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -29472,7 +25292,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29483,7 +25303,7 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_26_v2i64: +; AVX1-LABEL: ult_24_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29495,11 +25315,11 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_26_v2i64: +; AVX2-LABEL: ult_24_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29511,20 +25331,20 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_26_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_24_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -29532,18 +25352,18 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_26_v2i64: +; BITALG_NOVLX-LABEL: ult_24_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_26_v2i64: +; BITALG-LABEL: ult_24_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -29553,13 +25373,13 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_26_v2i64: +define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_24_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -29578,7 +25398,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29589,7 +25409,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_26_v2i64: +; SSE3-LABEL: ugt_24_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -29608,7 +25428,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29619,7 +25439,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_26_v2i64: +; SSSE3-LABEL: ugt_24_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -29634,7 +25454,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29645,7 +25465,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_26_v2i64: +; SSE41-LABEL: ugt_24_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -29660,7 +25480,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483672,2147483672] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29671,7 +25491,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_26_v2i64: +; AVX1-LABEL: ugt_24_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29686,7 +25506,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_26_v2i64: +; AVX2-LABEL: ugt_24_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29701,7 +25521,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -29709,7 +25529,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -29717,7 +25537,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_26_v2i64: +; BITALG_NOVLX-LABEL: ugt_24_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -29727,7 +25547,7 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_26_v2i64: +; BITALG-LABEL: ugt_24_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -29737,13 +25557,13 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_27_v2i64: +define <2 x i64> @ult_25_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_25_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -29762,7 +25582,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29773,7 +25593,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_27_v2i64: +; SSE3-LABEL: ult_25_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -29792,7 +25612,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29803,7 +25623,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_27_v2i64: +; SSSE3-LABEL: ult_25_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -29818,7 +25638,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29829,7 +25649,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_27_v2i64: +; SSE41-LABEL: ult_25_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -29844,7 +25664,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29855,7 +25675,7 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_27_v2i64: +; AVX1-LABEL: ult_25_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29867,11 +25687,11 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_27_v2i64: +; AVX2-LABEL: ult_25_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -29883,20 +25703,20 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_27_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_25_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -29904,18 +25724,18 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_27_v2i64: +; BITALG_NOVLX-LABEL: ult_25_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_27_v2i64: +; BITALG-LABEL: ult_25_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -29925,13 +25745,13 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_27_v2i64: +define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_25_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -29950,7 +25770,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29961,7 +25781,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_27_v2i64: +; SSE3-LABEL: ugt_25_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -29980,7 +25800,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -29991,7 +25811,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_27_v2i64: +; SSSE3-LABEL: ugt_25_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -30006,7 +25826,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30017,7 +25837,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_27_v2i64: +; SSE41-LABEL: ugt_25_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -30032,7 +25852,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483673,2147483673] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30043,7 +25863,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_27_v2i64: +; AVX1-LABEL: ugt_25_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30058,7 +25878,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_27_v2i64: +; AVX2-LABEL: ugt_25_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30073,7 +25893,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -30081,7 +25901,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -30089,7 +25909,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_27_v2i64: +; BITALG_NOVLX-LABEL: ugt_25_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -30099,7 +25919,7 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_27_v2i64: +; BITALG-LABEL: ugt_25_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -30109,13 +25929,13 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_28_v2i64: +define <2 x i64> @ult_26_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_26_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -30134,7 +25954,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30145,7 +25965,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_28_v2i64: +; SSE3-LABEL: ult_26_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -30164,7 +25984,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30175,7 +25995,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_28_v2i64: +; SSSE3-LABEL: ult_26_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -30190,7 +26010,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30201,7 +26021,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_28_v2i64: +; SSE41-LABEL: ult_26_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -30216,7 +26036,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30227,7 +26047,7 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_28_v2i64: +; AVX1-LABEL: ult_26_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30239,11 +26059,11 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_28_v2i64: +; AVX2-LABEL: ult_26_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30255,20 +26075,20 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_28_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_26_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -30276,18 +26096,18 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_28_v2i64: +; BITALG_NOVLX-LABEL: ult_26_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_28_v2i64: +; BITALG-LABEL: ult_26_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -30297,13 +26117,13 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_28_v2i64: +define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_26_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -30322,7 +26142,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30333,7 +26153,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_28_v2i64: +; SSE3-LABEL: ugt_26_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -30352,7 +26172,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30363,7 +26183,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_28_v2i64: +; SSSE3-LABEL: ugt_26_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -30378,7 +26198,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30389,7 +26209,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_28_v2i64: +; SSE41-LABEL: ugt_26_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -30404,7 +26224,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483674,2147483674] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30415,7 +26235,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_28_v2i64: +; AVX1-LABEL: ugt_26_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30430,7 +26250,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_28_v2i64: +; AVX2-LABEL: ugt_26_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30445,7 +26265,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -30453,7 +26273,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -30461,7 +26281,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_28_v2i64: +; BITALG_NOVLX-LABEL: ugt_26_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -30471,7 +26291,7 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_28_v2i64: +; BITALG-LABEL: ugt_26_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -30481,13 +26301,13 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_29_v2i64: +define <2 x i64> @ult_27_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_27_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -30506,7 +26326,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30517,7 +26337,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_29_v2i64: +; SSE3-LABEL: ult_27_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -30536,7 +26356,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30547,7 +26367,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_29_v2i64: +; SSSE3-LABEL: ult_27_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -30562,7 +26382,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30573,7 +26393,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_29_v2i64: +; SSE41-LABEL: ult_27_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -30588,7 +26408,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30599,7 +26419,7 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_29_v2i64: +; AVX1-LABEL: ult_27_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30611,11 +26431,11 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_29_v2i64: +; AVX2-LABEL: ult_27_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30627,20 +26447,20 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_29_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_27_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -30648,18 +26468,18 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_29_v2i64: +; BITALG_NOVLX-LABEL: ult_27_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_29_v2i64: +; BITALG-LABEL: ult_27_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -30669,13 +26489,13 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_29_v2i64: +define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_27_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -30694,7 +26514,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30705,7 +26525,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_29_v2i64: +; SSE3-LABEL: ugt_27_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -30724,7 +26544,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30735,7 +26555,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_29_v2i64: +; SSSE3-LABEL: ugt_27_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -30750,7 +26570,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30761,7 +26581,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_29_v2i64: +; SSE41-LABEL: ugt_27_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -30776,7 +26596,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483675,2147483675] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30787,7 +26607,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_29_v2i64: +; AVX1-LABEL: ugt_27_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30802,7 +26622,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_29_v2i64: +; AVX2-LABEL: ugt_27_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30817,7 +26637,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -30825,7 +26645,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -30833,7 +26653,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_29_v2i64: +; BITALG_NOVLX-LABEL: ugt_27_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -30843,7 +26663,7 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_29_v2i64: +; BITALG-LABEL: ugt_27_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -30853,13 +26673,13 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_30_v2i64: +define <2 x i64> @ult_28_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_28_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -30878,7 +26698,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30889,7 +26709,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_30_v2i64: +; SSE3-LABEL: ult_28_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -30908,7 +26728,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30919,7 +26739,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_30_v2i64: +; SSSE3-LABEL: ult_28_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -30934,7 +26754,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30945,7 +26765,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_30_v2i64: +; SSE41-LABEL: ult_28_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -30960,7 +26780,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -30971,7 +26791,7 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_30_v2i64: +; AVX1-LABEL: ult_28_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30983,11 +26803,11 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_30_v2i64: +; AVX2-LABEL: ult_28_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -30999,20 +26819,20 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_30_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_28_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -31020,18 +26840,18 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_30_v2i64: +; BITALG_NOVLX-LABEL: ult_28_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_30_v2i64: +; BITALG-LABEL: ult_28_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -31041,13 +26861,13 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_30_v2i64: +define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_28_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -31066,7 +26886,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31077,7 +26897,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_30_v2i64: +; SSE3-LABEL: ugt_28_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -31096,7 +26916,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31107,7 +26927,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_30_v2i64: +; SSSE3-LABEL: ugt_28_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -31122,7 +26942,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31133,7 +26953,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_30_v2i64: +; SSE41-LABEL: ugt_28_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -31148,7 +26968,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483676,2147483676] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31159,7 +26979,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_30_v2i64: +; AVX1-LABEL: ugt_28_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31174,7 +26994,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_30_v2i64: +; AVX2-LABEL: ugt_28_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31189,7 +27009,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -31197,7 +27017,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -31205,7 +27025,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_30_v2i64: +; BITALG_NOVLX-LABEL: ugt_28_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -31215,7 +27035,7 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_30_v2i64: +; BITALG-LABEL: ugt_28_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -31225,13 +27045,13 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_31_v2i64: +define <2 x i64> @ult_29_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_29_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -31250,7 +27070,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31261,7 +27081,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_31_v2i64: +; SSE3-LABEL: ult_29_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -31280,7 +27100,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31291,7 +27111,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_31_v2i64: +; SSSE3-LABEL: ult_29_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -31306,7 +27126,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31317,7 +27137,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_31_v2i64: +; SSE41-LABEL: ult_29_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -31332,7 +27152,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31343,7 +27163,7 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_31_v2i64: +; AVX1-LABEL: ult_29_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31355,11 +27175,11 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_31_v2i64: +; AVX2-LABEL: ult_29_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31371,20 +27191,20 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_31_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_29_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -31392,18 +27212,18 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_31_v2i64: +; BITALG_NOVLX-LABEL: ult_29_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_31_v2i64: +; BITALG-LABEL: ult_29_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -31413,13 +27233,13 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_31_v2i64: +define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_29_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -31438,7 +27258,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31449,7 +27269,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_31_v2i64: +; SSE3-LABEL: ugt_29_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -31468,7 +27288,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31479,7 +27299,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_31_v2i64: +; SSSE3-LABEL: ugt_29_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -31494,7 +27314,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31505,7 +27325,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_31_v2i64: +; SSE41-LABEL: ugt_29_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -31520,7 +27340,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483677,2147483677] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31531,7 +27351,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_31_v2i64: +; AVX1-LABEL: ugt_29_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31546,7 +27366,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_31_v2i64: +; AVX2-LABEL: ugt_29_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31561,7 +27381,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -31569,7 +27389,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -31577,7 +27397,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_31_v2i64: +; BITALG_NOVLX-LABEL: ugt_29_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -31587,7 +27407,7 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_31_v2i64: +; BITALG-LABEL: ugt_29_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -31597,13 +27417,13 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_32_v2i64: +define <2 x i64> @ult_30_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_30_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -31622,7 +27442,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31633,7 +27453,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_32_v2i64: +; SSE3-LABEL: ult_30_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -31652,7 +27472,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31663,7 +27483,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_32_v2i64: +; SSSE3-LABEL: ult_30_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -31678,7 +27498,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31689,7 +27509,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_32_v2i64: +; SSE41-LABEL: ult_30_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -31704,7 +27524,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31715,7 +27535,7 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_32_v2i64: +; AVX1-LABEL: ult_30_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31727,11 +27547,11 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_32_v2i64: +; AVX2-LABEL: ult_30_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31743,20 +27563,20 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_32_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_30_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -31764,18 +27584,18 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_32_v2i64: +; BITALG_NOVLX-LABEL: ult_30_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_32_v2i64: +; BITALG-LABEL: ult_30_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -31785,13 +27605,13 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_32_v2i64: +define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_30_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -31810,7 +27630,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31821,7 +27641,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_32_v2i64: +; SSE3-LABEL: ugt_30_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -31840,7 +27660,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31851,7 +27671,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_32_v2i64: +; SSSE3-LABEL: ugt_30_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -31866,7 +27686,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31877,7 +27697,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_32_v2i64: +; SSE41-LABEL: ugt_30_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -31892,7 +27712,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483678,2147483678] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -31903,7 +27723,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_32_v2i64: +; AVX1-LABEL: ugt_30_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31918,7 +27738,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_32_v2i64: +; AVX2-LABEL: ugt_30_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -31933,7 +27753,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -31941,7 +27761,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -31949,7 +27769,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_32_v2i64: +; BITALG_NOVLX-LABEL: ugt_30_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -31959,7 +27779,7 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_32_v2i64: +; BITALG-LABEL: ugt_30_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -31969,13 +27789,13 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_33_v2i64: +define <2 x i64> @ult_31_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_31_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -31994,7 +27814,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32005,7 +27825,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_33_v2i64: +; SSE3-LABEL: ult_31_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -32024,7 +27844,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32035,7 +27855,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_33_v2i64: +; SSSE3-LABEL: ult_31_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -32050,7 +27870,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32061,7 +27881,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_33_v2i64: +; SSE41-LABEL: ult_31_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -32076,7 +27896,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32087,7 +27907,7 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_33_v2i64: +; AVX1-LABEL: ult_31_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32099,11 +27919,11 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_33_v2i64: +; AVX2-LABEL: ult_31_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32115,20 +27935,20 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_33_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_31_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -32136,18 +27956,18 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_33_v2i64: +; BITALG_NOVLX-LABEL: ult_31_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_33_v2i64: +; BITALG-LABEL: ult_31_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -32157,13 +27977,13 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_33_v2i64: +define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_31_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -32182,7 +28002,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32193,7 +28013,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_33_v2i64: +; SSE3-LABEL: ugt_31_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -32212,7 +28032,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32223,7 +28043,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_33_v2i64: +; SSSE3-LABEL: ugt_31_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -32238,7 +28058,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32249,7 +28069,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_33_v2i64: +; SSE41-LABEL: ugt_31_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -32264,7 +28084,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483679,2147483679] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32275,7 +28095,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_33_v2i64: +; AVX1-LABEL: ugt_31_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32290,7 +28110,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_33_v2i64: +; AVX2-LABEL: ugt_31_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32305,7 +28125,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -32313,7 +28133,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -32321,7 +28141,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_33_v2i64: +; BITALG_NOVLX-LABEL: ugt_31_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -32331,7 +28151,7 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_33_v2i64: +; BITALG-LABEL: ugt_31_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -32341,13 +28161,13 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_34_v2i64: +define <2 x i64> @ult_32_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_32_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -32366,7 +28186,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32377,7 +28197,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_34_v2i64: +; SSE3-LABEL: ult_32_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -32396,7 +28216,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32407,7 +28227,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_34_v2i64: +; SSSE3-LABEL: ult_32_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -32422,7 +28242,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32433,7 +28253,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_34_v2i64: +; SSE41-LABEL: ult_32_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -32448,7 +28268,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32459,7 +28279,7 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_34_v2i64: +; AVX1-LABEL: ult_32_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32471,11 +28291,11 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_34_v2i64: +; AVX2-LABEL: ult_32_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32487,20 +28307,20 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_34_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_32_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -32508,18 +28328,18 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_34_v2i64: +; BITALG_NOVLX-LABEL: ult_32_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_34_v2i64: +; BITALG-LABEL: ult_32_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -32529,13 +28349,13 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_34_v2i64: +define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_32_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -32554,7 +28374,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32565,7 +28385,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_34_v2i64: +; SSE3-LABEL: ugt_32_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -32584,7 +28404,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32595,7 +28415,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_34_v2i64: +; SSSE3-LABEL: ugt_32_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -32610,7 +28430,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32621,7 +28441,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_34_v2i64: +; SSE41-LABEL: ugt_32_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -32636,7 +28456,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483680,2147483680] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32647,7 +28467,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_34_v2i64: +; AVX1-LABEL: ugt_32_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32662,7 +28482,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_34_v2i64: +; AVX2-LABEL: ugt_32_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32677,7 +28497,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -32685,7 +28505,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -32693,7 +28513,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_34_v2i64: +; BITALG_NOVLX-LABEL: ugt_32_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -32703,7 +28523,7 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_34_v2i64: +; BITALG-LABEL: ugt_32_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -32713,13 +28533,13 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_35_v2i64: +define <2 x i64> @ult_33_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_33_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -32738,7 +28558,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32749,7 +28569,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_35_v2i64: +; SSE3-LABEL: ult_33_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -32768,7 +28588,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32779,7 +28599,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_35_v2i64: +; SSSE3-LABEL: ult_33_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -32794,7 +28614,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32805,7 +28625,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_35_v2i64: +; SSE41-LABEL: ult_33_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -32820,7 +28640,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32831,7 +28651,7 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_35_v2i64: +; AVX1-LABEL: ult_33_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32843,11 +28663,11 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_35_v2i64: +; AVX2-LABEL: ult_33_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -32859,20 +28679,20 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_35_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_33_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -32880,18 +28700,18 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_35_v2i64: +; BITALG_NOVLX-LABEL: ult_33_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_35_v2i64: +; BITALG-LABEL: ult_33_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -32901,13 +28721,13 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_35_v2i64: +define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_33_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -32926,7 +28746,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32937,7 +28757,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_35_v2i64: +; SSE3-LABEL: ugt_33_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -32956,7 +28776,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32967,7 +28787,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_35_v2i64: +; SSSE3-LABEL: ugt_33_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -32982,7 +28802,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -32993,7 +28813,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_35_v2i64: +; SSE41-LABEL: ugt_33_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -33008,7 +28828,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483681,2147483681] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33019,7 +28839,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_35_v2i64: +; AVX1-LABEL: ugt_33_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33034,7 +28854,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_35_v2i64: +; AVX2-LABEL: ugt_33_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33049,7 +28869,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -33057,7 +28877,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -33065,7 +28885,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_35_v2i64: +; BITALG_NOVLX-LABEL: ugt_33_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -33075,7 +28895,7 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_35_v2i64: +; BITALG-LABEL: ugt_33_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -33085,13 +28905,13 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_36_v2i64: +define <2 x i64> @ult_34_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_34_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -33110,7 +28930,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33121,7 +28941,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_36_v2i64: +; SSE3-LABEL: ult_34_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -33140,7 +28960,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33151,7 +28971,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_36_v2i64: +; SSSE3-LABEL: ult_34_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -33166,7 +28986,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33177,7 +28997,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_36_v2i64: +; SSE41-LABEL: ult_34_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -33192,7 +29012,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33203,7 +29023,7 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_36_v2i64: +; AVX1-LABEL: ult_34_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33215,11 +29035,11 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_36_v2i64: +; AVX2-LABEL: ult_34_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33231,20 +29051,20 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_36_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_34_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -33252,18 +29072,18 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_36_v2i64: +; BITALG_NOVLX-LABEL: ult_34_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_36_v2i64: +; BITALG-LABEL: ult_34_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -33273,13 +29093,13 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_36_v2i64: +define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_34_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -33298,7 +29118,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33309,7 +29129,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_36_v2i64: +; SSE3-LABEL: ugt_34_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -33328,7 +29148,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33339,7 +29159,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_36_v2i64: +; SSSE3-LABEL: ugt_34_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -33354,7 +29174,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33365,7 +29185,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_36_v2i64: +; SSE41-LABEL: ugt_34_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -33380,7 +29200,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483682,2147483682] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33391,7 +29211,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_36_v2i64: +; AVX1-LABEL: ugt_34_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33406,7 +29226,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_36_v2i64: +; AVX2-LABEL: ugt_34_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33421,7 +29241,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -33429,7 +29249,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -33437,7 +29257,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_36_v2i64: +; BITALG_NOVLX-LABEL: ugt_34_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -33447,7 +29267,7 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_36_v2i64: +; BITALG-LABEL: ugt_34_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -33457,13 +29277,13 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_37_v2i64: +define <2 x i64> @ult_35_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_35_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -33482,7 +29302,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33493,7 +29313,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_37_v2i64: +; SSE3-LABEL: ult_35_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -33512,7 +29332,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33523,7 +29343,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_37_v2i64: +; SSSE3-LABEL: ult_35_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -33538,7 +29358,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33549,7 +29369,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_37_v2i64: +; SSE41-LABEL: ult_35_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -33564,7 +29384,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33575,7 +29395,7 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_37_v2i64: +; AVX1-LABEL: ult_35_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33587,11 +29407,11 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_37_v2i64: +; AVX2-LABEL: ult_35_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33603,20 +29423,20 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_37_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_35_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -33624,18 +29444,18 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_37_v2i64: +; BITALG_NOVLX-LABEL: ult_35_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_37_v2i64: +; BITALG-LABEL: ult_35_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -33645,13 +29465,13 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_37_v2i64: +define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_35_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -33670,7 +29490,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33681,7 +29501,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_37_v2i64: +; SSE3-LABEL: ugt_35_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -33700,7 +29520,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33711,7 +29531,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_37_v2i64: +; SSSE3-LABEL: ugt_35_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -33726,7 +29546,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33737,7 +29557,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_37_v2i64: +; SSE41-LABEL: ugt_35_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -33752,7 +29572,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483683,2147483683] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33763,7 +29583,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_37_v2i64: +; AVX1-LABEL: ugt_35_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33778,7 +29598,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_37_v2i64: +; AVX2-LABEL: ugt_35_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33793,7 +29613,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -33801,7 +29621,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -33809,7 +29629,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_37_v2i64: +; BITALG_NOVLX-LABEL: ugt_35_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -33819,7 +29639,7 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_37_v2i64: +; BITALG-LABEL: ugt_35_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -33829,13 +29649,13 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_38_v2i64: +define <2 x i64> @ult_36_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_36_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -33854,7 +29674,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33865,7 +29685,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_38_v2i64: +; SSE3-LABEL: ult_36_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -33884,7 +29704,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33895,7 +29715,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_38_v2i64: +; SSSE3-LABEL: ult_36_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -33910,7 +29730,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33921,7 +29741,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_38_v2i64: +; SSE41-LABEL: ult_36_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -33936,7 +29756,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -33947,7 +29767,7 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_38_v2i64: +; AVX1-LABEL: ult_36_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33959,11 +29779,11 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_38_v2i64: +; AVX2-LABEL: ult_36_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -33975,20 +29795,20 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_38_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_36_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -33996,18 +29816,18 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_38_v2i64: +; BITALG_NOVLX-LABEL: ult_36_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_38_v2i64: +; BITALG-LABEL: ult_36_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -34017,13 +29837,13 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_38_v2i64: +define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_36_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -34042,7 +29862,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34053,7 +29873,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_38_v2i64: +; SSE3-LABEL: ugt_36_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -34072,7 +29892,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34083,7 +29903,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_38_v2i64: +; SSSE3-LABEL: ugt_36_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -34098,7 +29918,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34109,7 +29929,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_38_v2i64: +; SSE41-LABEL: ugt_36_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -34124,7 +29944,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483684,2147483684] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34135,7 +29955,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_38_v2i64: +; AVX1-LABEL: ugt_36_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34150,7 +29970,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_38_v2i64: +; AVX2-LABEL: ugt_36_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34165,7 +29985,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -34173,7 +29993,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -34181,7 +30001,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_38_v2i64: +; BITALG_NOVLX-LABEL: ugt_36_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -34191,7 +30011,7 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_38_v2i64: +; BITALG-LABEL: ugt_36_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -34201,13 +30021,13 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_39_v2i64: +define <2 x i64> @ult_37_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_37_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -34226,7 +30046,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34237,7 +30057,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_39_v2i64: +; SSE3-LABEL: ult_37_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -34256,7 +30076,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34267,7 +30087,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_39_v2i64: +; SSSE3-LABEL: ult_37_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -34282,7 +30102,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34293,7 +30113,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_39_v2i64: +; SSE41-LABEL: ult_37_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -34308,7 +30128,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34319,7 +30139,7 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_39_v2i64: +; AVX1-LABEL: ult_37_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34331,11 +30151,11 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_39_v2i64: +; AVX2-LABEL: ult_37_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34347,20 +30167,20 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_39_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_37_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -34368,18 +30188,18 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_39_v2i64: +; BITALG_NOVLX-LABEL: ult_37_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_39_v2i64: +; BITALG-LABEL: ult_37_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -34389,13 +30209,13 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_39_v2i64: +define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_37_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -34414,7 +30234,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34425,7 +30245,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_39_v2i64: +; SSE3-LABEL: ugt_37_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -34444,7 +30264,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34455,7 +30275,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_39_v2i64: +; SSSE3-LABEL: ugt_37_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -34470,7 +30290,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34481,7 +30301,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_39_v2i64: +; SSE41-LABEL: ugt_37_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -34496,7 +30316,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483685,2147483685] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34507,7 +30327,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_39_v2i64: +; AVX1-LABEL: ugt_37_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34522,7 +30342,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_39_v2i64: +; AVX2-LABEL: ugt_37_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34537,7 +30357,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -34545,7 +30365,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -34553,7 +30373,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_39_v2i64: +; BITALG_NOVLX-LABEL: ugt_37_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -34563,7 +30383,7 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_39_v2i64: +; BITALG-LABEL: ugt_37_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -34573,13 +30393,13 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_40_v2i64: +define <2 x i64> @ult_38_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_38_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -34598,7 +30418,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34609,7 +30429,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_40_v2i64: +; SSE3-LABEL: ult_38_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -34628,7 +30448,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34639,7 +30459,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_40_v2i64: +; SSSE3-LABEL: ult_38_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -34654,7 +30474,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34665,7 +30485,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_40_v2i64: +; SSE41-LABEL: ult_38_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -34680,7 +30500,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34691,7 +30511,7 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_40_v2i64: +; AVX1-LABEL: ult_38_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34703,11 +30523,11 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_40_v2i64: +; AVX2-LABEL: ult_38_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34719,20 +30539,20 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_40_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_38_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -34740,18 +30560,18 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_40_v2i64: +; BITALG_NOVLX-LABEL: ult_38_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_40_v2i64: +; BITALG-LABEL: ult_38_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -34761,13 +30581,13 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_40_v2i64: +define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_38_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -34786,7 +30606,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34797,7 +30617,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_40_v2i64: +; SSE3-LABEL: ugt_38_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -34816,7 +30636,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34827,7 +30647,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_40_v2i64: +; SSSE3-LABEL: ugt_38_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -34842,7 +30662,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34853,7 +30673,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_40_v2i64: +; SSE41-LABEL: ugt_38_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -34868,7 +30688,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483686,2147483686] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34879,7 +30699,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_40_v2i64: +; AVX1-LABEL: ugt_38_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34894,7 +30714,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_40_v2i64: +; AVX2-LABEL: ugt_38_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -34909,7 +30729,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -34917,7 +30737,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -34925,7 +30745,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_40_v2i64: +; BITALG_NOVLX-LABEL: ugt_38_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -34935,7 +30755,7 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_40_v2i64: +; BITALG-LABEL: ugt_38_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -34945,13 +30765,13 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_41_v2i64: +define <2 x i64> @ult_39_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_39_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -34970,7 +30790,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -34981,7 +30801,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_41_v2i64: +; SSE3-LABEL: ult_39_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -35000,7 +30820,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35011,7 +30831,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_41_v2i64: +; SSSE3-LABEL: ult_39_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -35026,7 +30846,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35037,7 +30857,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_41_v2i64: +; SSE41-LABEL: ult_39_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -35052,7 +30872,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35063,7 +30883,7 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_41_v2i64: +; AVX1-LABEL: ult_39_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35075,11 +30895,11 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_41_v2i64: +; AVX2-LABEL: ult_39_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35091,20 +30911,20 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_41_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_39_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -35112,18 +30932,18 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_41_v2i64: +; BITALG_NOVLX-LABEL: ult_39_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_41_v2i64: +; BITALG-LABEL: ult_39_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -35133,13 +30953,13 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_41_v2i64: +define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_39_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -35158,7 +30978,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35169,7 +30989,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_41_v2i64: +; SSE3-LABEL: ugt_39_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -35188,7 +31008,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35199,7 +31019,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_41_v2i64: +; SSSE3-LABEL: ugt_39_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -35214,7 +31034,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35225,7 +31045,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_41_v2i64: +; SSE41-LABEL: ugt_39_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -35240,7 +31060,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483687,2147483687] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35251,7 +31071,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_41_v2i64: +; AVX1-LABEL: ugt_39_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35266,7 +31086,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_41_v2i64: +; AVX2-LABEL: ugt_39_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35281,7 +31101,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -35289,7 +31109,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -35297,7 +31117,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_41_v2i64: +; BITALG_NOVLX-LABEL: ugt_39_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -35307,7 +31127,7 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_41_v2i64: +; BITALG-LABEL: ugt_39_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -35317,13 +31137,13 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_42_v2i64: +define <2 x i64> @ult_40_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_40_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -35342,7 +31162,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35353,7 +31173,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_42_v2i64: +; SSE3-LABEL: ult_40_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -35372,7 +31192,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35383,7 +31203,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_42_v2i64: +; SSSE3-LABEL: ult_40_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -35398,7 +31218,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35409,7 +31229,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_42_v2i64: +; SSE41-LABEL: ult_40_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -35424,7 +31244,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35435,7 +31255,7 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_42_v2i64: +; AVX1-LABEL: ult_40_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35447,11 +31267,11 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_42_v2i64: +; AVX2-LABEL: ult_40_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35463,20 +31283,20 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_42_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_40_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -35484,18 +31304,18 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_42_v2i64: +; BITALG_NOVLX-LABEL: ult_40_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_42_v2i64: +; BITALG-LABEL: ult_40_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -35505,13 +31325,13 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_42_v2i64: +define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_40_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -35530,7 +31350,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35541,7 +31361,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_42_v2i64: +; SSE3-LABEL: ugt_40_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -35560,7 +31380,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35571,7 +31391,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_42_v2i64: +; SSSE3-LABEL: ugt_40_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -35586,7 +31406,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35597,7 +31417,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_42_v2i64: +; SSE41-LABEL: ugt_40_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -35612,7 +31432,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483688,2147483688] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35623,7 +31443,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_42_v2i64: +; AVX1-LABEL: ugt_40_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35638,7 +31458,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_42_v2i64: +; AVX2-LABEL: ugt_40_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35653,7 +31473,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -35661,7 +31481,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -35669,7 +31489,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_42_v2i64: +; BITALG_NOVLX-LABEL: ugt_40_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -35679,7 +31499,7 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_42_v2i64: +; BITALG-LABEL: ugt_40_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -35689,13 +31509,13 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_43_v2i64: +define <2 x i64> @ult_41_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_41_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -35714,7 +31534,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35725,7 +31545,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_43_v2i64: +; SSE3-LABEL: ult_41_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -35744,7 +31564,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35755,7 +31575,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_43_v2i64: +; SSSE3-LABEL: ult_41_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -35770,7 +31590,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35781,7 +31601,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_43_v2i64: +; SSE41-LABEL: ult_41_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -35796,7 +31616,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35807,7 +31627,7 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_43_v2i64: +; AVX1-LABEL: ult_41_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35819,11 +31639,11 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_43_v2i64: +; AVX2-LABEL: ult_41_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -35835,20 +31655,20 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_43_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_41_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -35856,18 +31676,18 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_43_v2i64: +; BITALG_NOVLX-LABEL: ult_41_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_43_v2i64: +; BITALG-LABEL: ult_41_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -35877,13 +31697,13 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_43_v2i64: +define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_41_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -35902,7 +31722,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35913,7 +31733,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_43_v2i64: +; SSE3-LABEL: ugt_41_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -35932,7 +31752,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35943,7 +31763,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_43_v2i64: +; SSSE3-LABEL: ugt_41_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -35958,7 +31778,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35969,7 +31789,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_43_v2i64: +; SSE41-LABEL: ugt_41_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -35984,7 +31804,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483689,2147483689] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -35995,7 +31815,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_43_v2i64: +; AVX1-LABEL: ugt_41_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36010,7 +31830,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_43_v2i64: +; AVX2-LABEL: ugt_41_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36025,7 +31845,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -36033,7 +31853,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -36041,7 +31861,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_43_v2i64: +; BITALG_NOVLX-LABEL: ugt_41_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -36051,7 +31871,7 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_43_v2i64: +; BITALG-LABEL: ugt_41_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -36061,13 +31881,13 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_44_v2i64: +define <2 x i64> @ult_42_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_42_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -36086,7 +31906,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36097,7 +31917,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_44_v2i64: +; SSE3-LABEL: ult_42_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -36116,7 +31936,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36127,7 +31947,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_44_v2i64: +; SSSE3-LABEL: ult_42_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -36142,7 +31962,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36153,7 +31973,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_44_v2i64: +; SSE41-LABEL: ult_42_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -36168,7 +31988,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36179,7 +31999,7 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_44_v2i64: +; AVX1-LABEL: ult_42_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36191,11 +32011,11 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_44_v2i64: +; AVX2-LABEL: ult_42_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36207,20 +32027,20 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_44_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_42_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -36228,18 +32048,18 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_44_v2i64: +; BITALG_NOVLX-LABEL: ult_42_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_44_v2i64: +; BITALG-LABEL: ult_42_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -36249,13 +32069,13 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_44_v2i64: +define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_42_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -36274,7 +32094,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36285,7 +32105,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_44_v2i64: +; SSE3-LABEL: ugt_42_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -36304,7 +32124,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36315,7 +32135,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_44_v2i64: +; SSSE3-LABEL: ugt_42_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -36330,7 +32150,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36341,7 +32161,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_44_v2i64: +; SSE41-LABEL: ugt_42_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -36356,7 +32176,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483690,2147483690] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36367,7 +32187,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_44_v2i64: +; AVX1-LABEL: ugt_42_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36382,7 +32202,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_44_v2i64: +; AVX2-LABEL: ugt_42_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36397,7 +32217,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -36405,7 +32225,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -36413,7 +32233,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_44_v2i64: +; BITALG_NOVLX-LABEL: ugt_42_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -36423,7 +32243,7 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_44_v2i64: +; BITALG-LABEL: ugt_42_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -36433,13 +32253,13 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_45_v2i64: +define <2 x i64> @ult_43_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_43_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -36458,7 +32278,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36469,7 +32289,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_45_v2i64: +; SSE3-LABEL: ult_43_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -36488,7 +32308,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36499,7 +32319,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_45_v2i64: +; SSSE3-LABEL: ult_43_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -36514,7 +32334,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36525,7 +32345,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_45_v2i64: +; SSE41-LABEL: ult_43_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -36540,7 +32360,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36551,7 +32371,7 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_45_v2i64: +; AVX1-LABEL: ult_43_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36563,11 +32383,11 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_45_v2i64: +; AVX2-LABEL: ult_43_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36579,20 +32399,20 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_45_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_43_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -36600,18 +32420,18 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_45_v2i64: +; BITALG_NOVLX-LABEL: ult_43_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_45_v2i64: +; BITALG-LABEL: ult_43_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -36621,13 +32441,13 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_45_v2i64: +define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_43_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -36646,7 +32466,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36657,7 +32477,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_45_v2i64: +; SSE3-LABEL: ugt_43_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -36676,7 +32496,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36687,7 +32507,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_45_v2i64: +; SSSE3-LABEL: ugt_43_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -36702,7 +32522,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36713,7 +32533,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_45_v2i64: +; SSE41-LABEL: ugt_43_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -36728,7 +32548,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483691,2147483691] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36739,7 +32559,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_45_v2i64: +; AVX1-LABEL: ugt_43_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36754,7 +32574,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_45_v2i64: +; AVX2-LABEL: ugt_43_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36769,7 +32589,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -36777,7 +32597,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -36785,7 +32605,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_45_v2i64: +; BITALG_NOVLX-LABEL: ugt_43_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -36795,7 +32615,7 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_45_v2i64: +; BITALG-LABEL: ugt_43_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -36805,13 +32625,13 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_46_v2i64: +define <2 x i64> @ult_44_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_44_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -36830,7 +32650,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36841,7 +32661,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_46_v2i64: +; SSE3-LABEL: ult_44_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -36860,7 +32680,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36871,7 +32691,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_46_v2i64: +; SSSE3-LABEL: ult_44_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -36886,7 +32706,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36897,7 +32717,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_46_v2i64: +; SSE41-LABEL: ult_44_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -36912,7 +32732,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -36923,7 +32743,7 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_46_v2i64: +; AVX1-LABEL: ult_44_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36935,11 +32755,11 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_46_v2i64: +; AVX2-LABEL: ult_44_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -36951,20 +32771,20 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_46_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_44_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -36972,18 +32792,18 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_46_v2i64: +; BITALG_NOVLX-LABEL: ult_44_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_46_v2i64: +; BITALG-LABEL: ult_44_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -36993,13 +32813,13 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_46_v2i64: +define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_44_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -37018,7 +32838,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37029,7 +32849,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_46_v2i64: +; SSE3-LABEL: ugt_44_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -37048,7 +32868,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37059,7 +32879,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_46_v2i64: +; SSSE3-LABEL: ugt_44_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -37074,7 +32894,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37085,7 +32905,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_46_v2i64: +; SSE41-LABEL: ugt_44_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -37100,7 +32920,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483692,2147483692] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37111,7 +32931,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_46_v2i64: +; AVX1-LABEL: ugt_44_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37126,7 +32946,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_46_v2i64: +; AVX2-LABEL: ugt_44_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37141,7 +32961,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -37149,7 +32969,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -37157,7 +32977,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_46_v2i64: +; BITALG_NOVLX-LABEL: ugt_44_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -37167,7 +32987,7 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_46_v2i64: +; BITALG-LABEL: ugt_44_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -37177,13 +32997,13 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_47_v2i64: +define <2 x i64> @ult_45_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_45_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -37202,7 +33022,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37213,7 +33033,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_47_v2i64: +; SSE3-LABEL: ult_45_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -37232,7 +33052,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37243,7 +33063,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_47_v2i64: +; SSSE3-LABEL: ult_45_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -37258,7 +33078,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37269,7 +33089,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_47_v2i64: +; SSE41-LABEL: ult_45_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -37284,7 +33104,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37295,7 +33115,7 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_47_v2i64: +; AVX1-LABEL: ult_45_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37307,11 +33127,11 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_47_v2i64: +; AVX2-LABEL: ult_45_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37323,20 +33143,20 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_47_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_45_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -37344,18 +33164,18 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_47_v2i64: +; BITALG_NOVLX-LABEL: ult_45_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_47_v2i64: +; BITALG-LABEL: ult_45_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -37365,13 +33185,13 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_47_v2i64: +define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_45_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -37390,7 +33210,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37401,7 +33221,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_47_v2i64: +; SSE3-LABEL: ugt_45_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -37420,7 +33240,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37431,7 +33251,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_47_v2i64: +; SSSE3-LABEL: ugt_45_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -37446,7 +33266,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37457,7 +33277,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_47_v2i64: +; SSE41-LABEL: ugt_45_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -37472,7 +33292,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483693,2147483693] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37483,7 +33303,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_47_v2i64: +; AVX1-LABEL: ugt_45_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37498,7 +33318,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_47_v2i64: +; AVX2-LABEL: ugt_45_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37513,7 +33333,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -37521,7 +33341,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -37529,7 +33349,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_47_v2i64: +; BITALG_NOVLX-LABEL: ugt_45_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -37539,7 +33359,7 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_47_v2i64: +; BITALG-LABEL: ugt_45_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -37549,13 +33369,13 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_48_v2i64: +define <2 x i64> @ult_46_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_46_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -37574,7 +33394,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37585,7 +33405,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_48_v2i64: +; SSE3-LABEL: ult_46_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -37604,7 +33424,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37615,7 +33435,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_48_v2i64: +; SSSE3-LABEL: ult_46_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -37630,7 +33450,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37641,7 +33461,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_48_v2i64: +; SSE41-LABEL: ult_46_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -37656,7 +33476,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37667,7 +33487,7 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_48_v2i64: +; AVX1-LABEL: ult_46_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37679,11 +33499,11 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_48_v2i64: +; AVX2-LABEL: ult_46_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37695,20 +33515,20 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_48_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_46_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -37716,18 +33536,18 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_48_v2i64: +; BITALG_NOVLX-LABEL: ult_46_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_48_v2i64: +; BITALG-LABEL: ult_46_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -37737,13 +33557,13 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_48_v2i64: +define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_46_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -37762,7 +33582,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37773,7 +33593,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_48_v2i64: +; SSE3-LABEL: ugt_46_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -37792,7 +33612,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37803,7 +33623,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_48_v2i64: +; SSSE3-LABEL: ugt_46_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -37818,7 +33638,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37829,7 +33649,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_48_v2i64: +; SSE41-LABEL: ugt_46_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -37844,7 +33664,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483694,2147483694] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37855,7 +33675,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_48_v2i64: +; AVX1-LABEL: ugt_46_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37870,7 +33690,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_48_v2i64: +; AVX2-LABEL: ugt_46_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -37885,7 +33705,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -37893,7 +33713,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -37901,7 +33721,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_48_v2i64: +; BITALG_NOVLX-LABEL: ugt_46_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -37911,7 +33731,7 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_48_v2i64: +; BITALG-LABEL: ugt_46_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -37921,13 +33741,13 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_49_v2i64: +define <2 x i64> @ult_47_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_47_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -37946,7 +33766,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37957,7 +33777,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_49_v2i64: +; SSE3-LABEL: ult_47_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -37976,7 +33796,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -37987,7 +33807,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_49_v2i64: +; SSSE3-LABEL: ult_47_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -38002,7 +33822,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38013,7 +33833,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_49_v2i64: +; SSE41-LABEL: ult_47_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -38028,7 +33848,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38039,7 +33859,7 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_49_v2i64: +; AVX1-LABEL: ult_47_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38051,11 +33871,11 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_49_v2i64: +; AVX2-LABEL: ult_47_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38067,20 +33887,20 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_49_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_47_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -38088,18 +33908,18 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_49_v2i64: +; BITALG_NOVLX-LABEL: ult_47_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_49_v2i64: +; BITALG-LABEL: ult_47_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -38109,13 +33929,13 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_49_v2i64: +define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_47_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -38134,7 +33954,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38145,7 +33965,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_49_v2i64: +; SSE3-LABEL: ugt_47_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -38164,7 +33984,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38175,7 +33995,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_49_v2i64: +; SSSE3-LABEL: ugt_47_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -38190,7 +34010,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38201,7 +34021,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_49_v2i64: +; SSE41-LABEL: ugt_47_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -38216,7 +34036,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483695,2147483695] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38227,7 +34047,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_49_v2i64: +; AVX1-LABEL: ugt_47_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38242,7 +34062,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_49_v2i64: +; AVX2-LABEL: ugt_47_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38257,7 +34077,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -38265,7 +34085,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -38273,7 +34093,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_49_v2i64: +; BITALG_NOVLX-LABEL: ugt_47_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -38283,7 +34103,7 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_49_v2i64: +; BITALG-LABEL: ugt_47_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -38293,13 +34113,13 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_50_v2i64: +define <2 x i64> @ult_48_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_48_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -38318,7 +34138,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38329,7 +34149,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_50_v2i64: +; SSE3-LABEL: ult_48_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -38348,7 +34168,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38359,7 +34179,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_50_v2i64: +; SSSE3-LABEL: ult_48_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -38374,7 +34194,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38385,7 +34205,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_50_v2i64: +; SSE41-LABEL: ult_48_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -38400,7 +34220,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38411,7 +34231,7 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_50_v2i64: +; AVX1-LABEL: ult_48_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38423,11 +34243,11 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_50_v2i64: +; AVX2-LABEL: ult_48_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38439,20 +34259,20 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_50_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_48_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -38460,18 +34280,18 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_50_v2i64: +; BITALG_NOVLX-LABEL: ult_48_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_50_v2i64: +; BITALG-LABEL: ult_48_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -38481,13 +34301,13 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_50_v2i64: +define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_48_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -38506,7 +34326,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38517,7 +34337,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_50_v2i64: +; SSE3-LABEL: ugt_48_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -38536,7 +34356,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38547,7 +34367,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_50_v2i64: +; SSSE3-LABEL: ugt_48_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -38562,7 +34382,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38573,7 +34393,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_50_v2i64: +; SSE41-LABEL: ugt_48_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -38588,7 +34408,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483696,2147483696] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38599,7 +34419,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_50_v2i64: +; AVX1-LABEL: ugt_48_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38614,7 +34434,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_50_v2i64: +; AVX2-LABEL: ugt_48_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38629,7 +34449,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -38637,7 +34457,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -38645,7 +34465,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_50_v2i64: +; BITALG_NOVLX-LABEL: ugt_48_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -38655,7 +34475,7 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_50_v2i64: +; BITALG-LABEL: ugt_48_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -38665,13 +34485,13 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_51_v2i64: +define <2 x i64> @ult_49_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_49_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -38690,7 +34510,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38701,7 +34521,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_51_v2i64: +; SSE3-LABEL: ult_49_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -38720,7 +34540,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38731,7 +34551,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_51_v2i64: +; SSSE3-LABEL: ult_49_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -38746,7 +34566,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38757,7 +34577,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_51_v2i64: +; SSE41-LABEL: ult_49_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -38772,7 +34592,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38783,7 +34603,7 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_51_v2i64: +; AVX1-LABEL: ult_49_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38795,11 +34615,11 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_51_v2i64: +; AVX2-LABEL: ult_49_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38811,20 +34631,20 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_51_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_49_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -38832,18 +34652,18 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_51_v2i64: +; BITALG_NOVLX-LABEL: ult_49_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_51_v2i64: +; BITALG-LABEL: ult_49_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -38853,13 +34673,13 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_51_v2i64: +define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_49_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -38878,7 +34698,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38889,7 +34709,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_51_v2i64: +; SSE3-LABEL: ugt_49_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -38908,7 +34728,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38919,7 +34739,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_51_v2i64: +; SSSE3-LABEL: ugt_49_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -38934,7 +34754,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38945,7 +34765,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_51_v2i64: +; SSE41-LABEL: ugt_49_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -38960,7 +34780,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483697,2147483697] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -38971,7 +34791,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_51_v2i64: +; AVX1-LABEL: ugt_49_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -38986,7 +34806,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_51_v2i64: +; AVX2-LABEL: ugt_49_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39001,7 +34821,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -39009,7 +34829,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -39017,7 +34837,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_51_v2i64: +; BITALG_NOVLX-LABEL: ugt_49_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -39027,7 +34847,7 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_51_v2i64: +; BITALG-LABEL: ugt_49_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -39037,13 +34857,13 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_52_v2i64: +define <2 x i64> @ult_50_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_50_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -39062,7 +34882,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39073,7 +34893,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_52_v2i64: +; SSE3-LABEL: ult_50_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -39092,7 +34912,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39103,7 +34923,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_52_v2i64: +; SSSE3-LABEL: ult_50_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -39118,7 +34938,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39129,7 +34949,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_52_v2i64: +; SSE41-LABEL: ult_50_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -39144,7 +34964,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39155,7 +34975,7 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_52_v2i64: +; AVX1-LABEL: ult_50_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39167,11 +34987,11 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_52_v2i64: +; AVX2-LABEL: ult_50_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39183,20 +35003,20 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_52_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_50_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -39204,18 +35024,18 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_52_v2i64: +; BITALG_NOVLX-LABEL: ult_50_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_52_v2i64: +; BITALG-LABEL: ult_50_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -39225,13 +35045,13 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_52_v2i64: +define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_50_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -39250,7 +35070,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39261,7 +35081,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_52_v2i64: +; SSE3-LABEL: ugt_50_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -39280,7 +35100,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39291,7 +35111,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_52_v2i64: +; SSSE3-LABEL: ugt_50_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -39306,7 +35126,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39317,7 +35137,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_52_v2i64: +; SSE41-LABEL: ugt_50_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -39332,7 +35152,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483698,2147483698] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39343,7 +35163,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_52_v2i64: +; AVX1-LABEL: ugt_50_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39358,7 +35178,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_52_v2i64: +; AVX2-LABEL: ugt_50_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39373,7 +35193,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -39381,7 +35201,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -39389,7 +35209,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_52_v2i64: +; BITALG_NOVLX-LABEL: ugt_50_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -39399,7 +35219,7 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_52_v2i64: +; BITALG-LABEL: ugt_50_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -39409,13 +35229,13 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_53_v2i64: +define <2 x i64> @ult_51_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_51_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -39434,7 +35254,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39445,7 +35265,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_53_v2i64: +; SSE3-LABEL: ult_51_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -39464,7 +35284,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39475,7 +35295,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_53_v2i64: +; SSSE3-LABEL: ult_51_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -39490,7 +35310,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39501,7 +35321,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_53_v2i64: +; SSE41-LABEL: ult_51_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -39516,7 +35336,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39527,7 +35347,7 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_53_v2i64: +; AVX1-LABEL: ult_51_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39539,11 +35359,11 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_53_v2i64: +; AVX2-LABEL: ult_51_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39555,20 +35375,20 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_53_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_51_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -39576,18 +35396,18 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_53_v2i64: +; BITALG_NOVLX-LABEL: ult_51_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_53_v2i64: +; BITALG-LABEL: ult_51_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -39597,13 +35417,13 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_53_v2i64: +define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_51_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -39622,7 +35442,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39633,7 +35453,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_53_v2i64: +; SSE3-LABEL: ugt_51_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -39652,7 +35472,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39663,7 +35483,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_53_v2i64: +; SSSE3-LABEL: ugt_51_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -39678,7 +35498,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39689,7 +35509,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_53_v2i64: +; SSE41-LABEL: ugt_51_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -39704,7 +35524,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483699,2147483699] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39715,7 +35535,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_53_v2i64: +; AVX1-LABEL: ugt_51_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39730,7 +35550,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_53_v2i64: +; AVX2-LABEL: ugt_51_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39745,7 +35565,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -39753,7 +35573,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -39761,7 +35581,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_53_v2i64: +; BITALG_NOVLX-LABEL: ugt_51_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -39771,7 +35591,7 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_53_v2i64: +; BITALG-LABEL: ugt_51_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -39781,13 +35601,13 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_54_v2i64: +define <2 x i64> @ult_52_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_52_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -39806,7 +35626,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39817,7 +35637,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_54_v2i64: +; SSE3-LABEL: ult_52_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -39836,7 +35656,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39847,7 +35667,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_54_v2i64: +; SSSE3-LABEL: ult_52_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -39862,7 +35682,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39873,7 +35693,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_54_v2i64: +; SSE41-LABEL: ult_52_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -39888,7 +35708,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -39899,7 +35719,7 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_54_v2i64: +; AVX1-LABEL: ult_52_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39911,11 +35731,11 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_54_v2i64: +; AVX2-LABEL: ult_52_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -39927,20 +35747,20 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_54_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_52_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -39948,18 +35768,18 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_54_v2i64: +; BITALG_NOVLX-LABEL: ult_52_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_54_v2i64: +; BITALG-LABEL: ult_52_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -39969,13 +35789,13 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_54_v2i64: +define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_52_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -39994,7 +35814,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40005,7 +35825,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_54_v2i64: +; SSE3-LABEL: ugt_52_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -40024,7 +35844,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40035,7 +35855,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_54_v2i64: +; SSSE3-LABEL: ugt_52_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -40050,7 +35870,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40061,7 +35881,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_54_v2i64: +; SSE41-LABEL: ugt_52_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -40076,7 +35896,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483700,2147483700] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40087,7 +35907,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_54_v2i64: +; AVX1-LABEL: ugt_52_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40102,7 +35922,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_54_v2i64: +; AVX2-LABEL: ugt_52_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40117,7 +35937,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -40125,7 +35945,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -40133,7 +35953,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_54_v2i64: +; BITALG_NOVLX-LABEL: ugt_52_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -40143,7 +35963,7 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_54_v2i64: +; BITALG-LABEL: ugt_52_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -40153,13 +35973,13 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_55_v2i64: +define <2 x i64> @ult_53_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_53_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -40178,7 +35998,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40189,7 +36009,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_55_v2i64: +; SSE3-LABEL: ult_53_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -40208,7 +36028,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40219,7 +36039,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_55_v2i64: +; SSSE3-LABEL: ult_53_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -40234,7 +36054,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40245,7 +36065,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_55_v2i64: +; SSE41-LABEL: ult_53_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -40260,7 +36080,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40271,7 +36091,7 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_55_v2i64: +; AVX1-LABEL: ult_53_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40283,11 +36103,11 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_55_v2i64: +; AVX2-LABEL: ult_53_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40299,20 +36119,20 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_55_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_53_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -40320,18 +36140,18 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_55_v2i64: +; BITALG_NOVLX-LABEL: ult_53_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_55_v2i64: +; BITALG-LABEL: ult_53_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -40341,13 +36161,13 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_55_v2i64: +define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_53_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -40366,7 +36186,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40377,7 +36197,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_55_v2i64: +; SSE3-LABEL: ugt_53_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -40396,7 +36216,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40407,7 +36227,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_55_v2i64: +; SSSE3-LABEL: ugt_53_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -40422,7 +36242,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40433,7 +36253,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_55_v2i64: +; SSE41-LABEL: ugt_53_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -40448,7 +36268,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483701,2147483701] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40459,7 +36279,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_55_v2i64: +; AVX1-LABEL: ugt_53_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40474,7 +36294,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_55_v2i64: +; AVX2-LABEL: ugt_53_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40489,7 +36309,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -40497,7 +36317,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -40505,7 +36325,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_55_v2i64: +; BITALG_NOVLX-LABEL: ugt_53_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -40515,7 +36335,7 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_55_v2i64: +; BITALG-LABEL: ugt_53_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -40525,13 +36345,13 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_56_v2i64: +define <2 x i64> @ult_54_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_54_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -40550,7 +36370,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40561,7 +36381,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_56_v2i64: +; SSE3-LABEL: ult_54_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -40580,7 +36400,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40591,7 +36411,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_56_v2i64: +; SSSE3-LABEL: ult_54_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -40606,7 +36426,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40617,7 +36437,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_56_v2i64: +; SSE41-LABEL: ult_54_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -40632,7 +36452,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40643,7 +36463,7 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_56_v2i64: +; AVX1-LABEL: ult_54_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40655,11 +36475,11 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_56_v2i64: +; AVX2-LABEL: ult_54_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40671,20 +36491,20 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_56_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_54_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -40692,18 +36512,18 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_56_v2i64: +; BITALG_NOVLX-LABEL: ult_54_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_56_v2i64: +; BITALG-LABEL: ult_54_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -40713,13 +36533,13 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_56_v2i64: +define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_54_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -40738,7 +36558,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40749,7 +36569,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_56_v2i64: +; SSE3-LABEL: ugt_54_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -40768,7 +36588,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40779,7 +36599,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_56_v2i64: +; SSSE3-LABEL: ugt_54_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -40794,7 +36614,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40805,7 +36625,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_56_v2i64: +; SSE41-LABEL: ugt_54_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -40820,7 +36640,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483702,2147483702] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40831,7 +36651,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_56_v2i64: +; AVX1-LABEL: ugt_54_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40846,7 +36666,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_56_v2i64: +; AVX2-LABEL: ugt_54_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -40861,7 +36681,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -40869,7 +36689,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -40877,7 +36697,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_56_v2i64: +; BITALG_NOVLX-LABEL: ugt_54_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -40887,7 +36707,7 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_56_v2i64: +; BITALG-LABEL: ugt_54_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -40897,13 +36717,13 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_57_v2i64: +define <2 x i64> @ult_55_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_55_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -40922,7 +36742,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40933,7 +36753,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_57_v2i64: +; SSE3-LABEL: ult_55_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -40952,7 +36772,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40963,7 +36783,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_57_v2i64: +; SSSE3-LABEL: ult_55_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -40978,7 +36798,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -40989,7 +36809,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_57_v2i64: +; SSE41-LABEL: ult_55_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -41004,7 +36824,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41015,7 +36835,7 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_57_v2i64: +; AVX1-LABEL: ult_55_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41027,11 +36847,11 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_57_v2i64: +; AVX2-LABEL: ult_55_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41043,20 +36863,20 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_57_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_55_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -41064,18 +36884,18 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_57_v2i64: +; BITALG_NOVLX-LABEL: ult_55_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_57_v2i64: +; BITALG-LABEL: ult_55_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -41085,13 +36905,13 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_57_v2i64: +define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_55_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -41110,7 +36930,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41121,7 +36941,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_57_v2i64: +; SSE3-LABEL: ugt_55_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -41140,7 +36960,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41151,7 +36971,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_57_v2i64: +; SSSE3-LABEL: ugt_55_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -41166,7 +36986,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41177,7 +36997,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_57_v2i64: +; SSE41-LABEL: ugt_55_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -41192,7 +37012,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483703,2147483703] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41203,7 +37023,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_57_v2i64: +; AVX1-LABEL: ugt_55_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41218,7 +37038,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_57_v2i64: +; AVX2-LABEL: ugt_55_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41233,7 +37053,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -41241,7 +37061,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -41249,7 +37069,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_57_v2i64: +; BITALG_NOVLX-LABEL: ugt_55_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -41259,7 +37079,7 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_57_v2i64: +; BITALG-LABEL: ugt_55_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -41269,13 +37089,13 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_58_v2i64: +define <2 x i64> @ult_56_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_56_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -41294,7 +37114,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41305,7 +37125,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_58_v2i64: +; SSE3-LABEL: ult_56_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -41324,7 +37144,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41335,7 +37155,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_58_v2i64: +; SSSE3-LABEL: ult_56_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -41350,7 +37170,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41361,7 +37181,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_58_v2i64: +; SSE41-LABEL: ult_56_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -41376,7 +37196,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41387,7 +37207,7 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_58_v2i64: +; AVX1-LABEL: ult_56_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41399,11 +37219,11 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_58_v2i64: +; AVX2-LABEL: ult_56_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41415,20 +37235,20 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_58_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_56_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -41436,18 +37256,18 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_58_v2i64: +; BITALG_NOVLX-LABEL: ult_56_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_58_v2i64: +; BITALG-LABEL: ult_56_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -41457,13 +37277,13 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_58_v2i64: +define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_56_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -41482,7 +37302,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41493,7 +37313,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_58_v2i64: +; SSE3-LABEL: ugt_56_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -41512,7 +37332,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41523,7 +37343,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_58_v2i64: +; SSSE3-LABEL: ugt_56_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -41538,7 +37358,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41549,7 +37369,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_58_v2i64: +; SSE41-LABEL: ugt_56_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -41564,7 +37384,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483704,2147483704] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41575,7 +37395,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_58_v2i64: +; AVX1-LABEL: ugt_56_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41590,7 +37410,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_58_v2i64: +; AVX2-LABEL: ugt_56_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41605,7 +37425,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -41613,7 +37433,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -41621,7 +37441,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_58_v2i64: +; BITALG_NOVLX-LABEL: ugt_56_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -41631,7 +37451,7 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_58_v2i64: +; BITALG-LABEL: ugt_56_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -41641,13 +37461,13 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_59_v2i64: +define <2 x i64> @ult_57_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_57_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -41666,7 +37486,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41677,7 +37497,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_59_v2i64: +; SSE3-LABEL: ult_57_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -41696,7 +37516,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41707,7 +37527,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_59_v2i64: +; SSSE3-LABEL: ult_57_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -41722,7 +37542,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41733,7 +37553,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_59_v2i64: +; SSE41-LABEL: ult_57_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -41748,7 +37568,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41759,7 +37579,7 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_59_v2i64: +; AVX1-LABEL: ult_57_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41771,11 +37591,11 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_59_v2i64: +; AVX2-LABEL: ult_57_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41787,20 +37607,20 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_59_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_57_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -41808,18 +37628,18 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_59_v2i64: +; BITALG_NOVLX-LABEL: ult_57_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_59_v2i64: +; BITALG-LABEL: ult_57_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -41829,13 +37649,13 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_59_v2i64: +define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_57_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -41854,7 +37674,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41865,7 +37685,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_59_v2i64: +; SSE3-LABEL: ugt_57_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -41884,7 +37704,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41895,7 +37715,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_59_v2i64: +; SSSE3-LABEL: ugt_57_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -41910,7 +37730,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41921,7 +37741,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_59_v2i64: +; SSE41-LABEL: ugt_57_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -41936,7 +37756,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483705,2147483705] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -41947,7 +37767,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_59_v2i64: +; AVX1-LABEL: ugt_57_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41962,7 +37782,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_59_v2i64: +; AVX2-LABEL: ugt_57_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -41977,7 +37797,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -41985,7 +37805,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -41993,7 +37813,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_59_v2i64: +; BITALG_NOVLX-LABEL: ugt_57_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -42003,7 +37823,7 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_59_v2i64: +; BITALG-LABEL: ugt_57_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -42013,13 +37833,13 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_60_v2i64: +define <2 x i64> @ult_58_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_58_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -42038,7 +37858,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42049,7 +37869,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_60_v2i64: +; SSE3-LABEL: ult_58_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -42068,7 +37888,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42079,7 +37899,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_60_v2i64: +; SSSE3-LABEL: ult_58_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -42094,7 +37914,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42105,7 +37925,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_60_v2i64: +; SSE41-LABEL: ult_58_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -42120,7 +37940,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42131,7 +37951,7 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_60_v2i64: +; AVX1-LABEL: ult_58_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42143,11 +37963,11 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_60_v2i64: +; AVX2-LABEL: ult_58_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42159,20 +37979,20 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_60_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_58_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -42180,18 +38000,18 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_60_v2i64: +; BITALG_NOVLX-LABEL: ult_58_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_60_v2i64: +; BITALG-LABEL: ult_58_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -42201,13 +38021,13 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_60_v2i64: +define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_58_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -42226,7 +38046,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42237,7 +38057,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_60_v2i64: +; SSE3-LABEL: ugt_58_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -42256,7 +38076,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42267,7 +38087,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_60_v2i64: +; SSSE3-LABEL: ugt_58_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -42282,7 +38102,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42293,7 +38113,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_60_v2i64: +; SSE41-LABEL: ugt_58_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -42308,7 +38128,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483706,2147483706] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42319,7 +38139,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_60_v2i64: +; AVX1-LABEL: ugt_58_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42334,7 +38154,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_60_v2i64: +; AVX2-LABEL: ugt_58_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42349,7 +38169,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -42357,7 +38177,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -42365,7 +38185,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_60_v2i64: +; BITALG_NOVLX-LABEL: ugt_58_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -42375,7 +38195,7 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_60_v2i64: +; BITALG-LABEL: ugt_58_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -42385,13 +38205,13 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_61_v2i64: +define <2 x i64> @ult_59_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_59_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -42410,7 +38230,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42421,7 +38241,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_61_v2i64: +; SSE3-LABEL: ult_59_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -42440,7 +38260,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42451,7 +38271,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_61_v2i64: +; SSSE3-LABEL: ult_59_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -42466,7 +38286,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42477,7 +38297,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_61_v2i64: +; SSE41-LABEL: ult_59_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -42492,7 +38312,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42503,7 +38323,7 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_61_v2i64: +; AVX1-LABEL: ult_59_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42515,11 +38335,11 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_61_v2i64: +; AVX2-LABEL: ult_59_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42531,20 +38351,20 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_61_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_59_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -42552,18 +38372,18 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_61_v2i64: +; BITALG_NOVLX-LABEL: ult_59_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_61_v2i64: +; BITALG-LABEL: ult_59_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -42573,13 +38393,13 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_61_v2i64: +define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_59_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -42598,7 +38418,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42609,7 +38429,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_61_v2i64: +; SSE3-LABEL: ugt_59_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -42628,7 +38448,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42639,7 +38459,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_61_v2i64: +; SSSE3-LABEL: ugt_59_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -42654,7 +38474,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42665,7 +38485,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_61_v2i64: +; SSE41-LABEL: ugt_59_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -42680,7 +38500,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483707,2147483707] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42691,7 +38511,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_61_v2i64: +; AVX1-LABEL: ugt_59_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42706,7 +38526,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_61_v2i64: +; AVX2-LABEL: ugt_59_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42721,7 +38541,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -42729,7 +38549,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -42737,7 +38557,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_61_v2i64: +; BITALG_NOVLX-LABEL: ugt_59_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -42747,7 +38567,7 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_61_v2i64: +; BITALG-LABEL: ugt_59_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -42757,13 +38577,13 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_62_v2i64: +define <2 x i64> @ult_60_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_60_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -42782,7 +38602,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42793,7 +38613,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_62_v2i64: +; SSE3-LABEL: ult_60_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -42812,7 +38632,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42823,7 +38643,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_62_v2i64: +; SSSE3-LABEL: ult_60_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -42838,7 +38658,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42849,7 +38669,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_62_v2i64: +; SSE41-LABEL: ult_60_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -42864,7 +38684,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42875,7 +38695,7 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_62_v2i64: +; AVX1-LABEL: ult_60_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42887,11 +38707,11 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_62_v2i64: +; AVX2-LABEL: ult_60_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -42903,20 +38723,20 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_62_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_60_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -42924,18 +38744,18 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_62_v2i64: +; BITALG_NOVLX-LABEL: ult_60_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_62_v2i64: +; BITALG-LABEL: ult_60_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -42945,13 +38765,13 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_62_v2i64: +define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_60_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -42970,7 +38790,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -42981,7 +38801,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_62_v2i64: +; SSE3-LABEL: ugt_60_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -43000,7 +38820,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43011,7 +38831,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_62_v2i64: +; SSSE3-LABEL: ugt_60_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -43026,7 +38846,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43037,7 +38857,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_62_v2i64: +; SSE41-LABEL: ugt_60_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -43052,7 +38872,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483708,2147483708] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43063,7 +38883,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_62_v2i64: +; AVX1-LABEL: ugt_60_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43078,7 +38898,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_62_v2i64: +; AVX2-LABEL: ugt_60_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43093,7 +38913,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -43101,7 +38921,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -43109,7 +38929,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_62_v2i64: +; BITALG_NOVLX-LABEL: ugt_60_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -43119,7 +38939,7 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_62_v2i64: +; BITALG-LABEL: ugt_60_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -43129,13 +38949,13 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_63_v2i64: +define <2 x i64> @ult_61_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_61_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -43154,7 +38974,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43165,7 +38985,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_63_v2i64: +; SSE3-LABEL: ult_61_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -43184,7 +39004,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43195,7 +39015,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_63_v2i64: +; SSSE3-LABEL: ult_61_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -43210,7 +39030,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43221,7 +39041,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_63_v2i64: +; SSE41-LABEL: ult_61_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -43236,7 +39056,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43247,7 +39067,7 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_63_v2i64: +; AVX1-LABEL: ult_61_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43259,11 +39079,11 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_63_v2i64: +; AVX2-LABEL: ult_61_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43275,20 +39095,20 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_63_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_61_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -43296,18 +39116,18 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_63_v2i64: +; BITALG_NOVLX-LABEL: ult_61_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_63_v2i64: +; BITALG-LABEL: ult_61_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -43317,13 +39137,13 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_63_v2i64: +define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_61_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -43342,7 +39162,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43353,7 +39173,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_63_v2i64: +; SSE3-LABEL: ugt_61_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -43372,7 +39192,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43383,7 +39203,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_63_v2i64: +; SSSE3-LABEL: ugt_61_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -43398,7 +39218,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43409,7 +39229,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_63_v2i64: +; SSE41-LABEL: ugt_61_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -43424,7 +39244,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483709,2147483709] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43435,7 +39255,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_63_v2i64: +; AVX1-LABEL: ugt_61_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43450,7 +39270,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_63_v2i64: +; AVX2-LABEL: ugt_61_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43465,7 +39285,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_63_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -43473,7 +39293,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_63_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -43481,7 +39301,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_63_v2i64: +; BITALG_NOVLX-LABEL: ugt_61_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -43491,7 +39311,7 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_63_v2i64: +; BITALG-LABEL: ugt_61_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -43501,13 +39321,13 @@ define <2 x i64> @ugt_63_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_64_v2i64: +define <2 x i64> @ult_62_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_62_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -43526,7 +39346,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43537,7 +39357,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_64_v2i64: +; SSE3-LABEL: ult_62_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -43556,7 +39376,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43567,7 +39387,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_64_v2i64: +; SSSE3-LABEL: ult_62_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -43582,7 +39402,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43593,7 +39413,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_64_v2i64: +; SSE41-LABEL: ult_62_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -43608,7 +39428,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43619,7 +39439,7 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_64_v2i64: +; AVX1-LABEL: ult_62_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43631,11 +39451,11 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_64_v2i64: +; AVX2-LABEL: ult_62_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43647,20 +39467,20 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_64_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_62_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_64_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -43668,18 +39488,18 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_64_v2i64: +; BITALG_NOVLX-LABEL: ult_62_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_64_v2i64: +; BITALG-LABEL: ult_62_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -43689,13 +39509,13 @@ define <2 x i64> @ult_64_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ugt_64_v2i64: +define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ugt_62_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -43714,7 +39534,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43725,7 +39545,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ugt_64_v2i64: +; SSE3-LABEL: ugt_62_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -43744,7 +39564,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43755,7 +39575,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ugt_64_v2i64: +; SSSE3-LABEL: ugt_62_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -43770,7 +39590,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43781,7 +39601,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ugt_64_v2i64: +; SSE41-LABEL: ugt_62_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -43796,7 +39616,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483712,2147483712] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483710,2147483710] ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43807,7 +39627,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ugt_64_v2i64: +; AVX1-LABEL: ugt_62_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43822,7 +39642,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_64_v2i64: +; AVX2-LABEL: ugt_62_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -43837,7 +39657,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpcmpgtq {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_64_v2i64: +; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 @@ -43845,7 +39665,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_64_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip), %xmm0, %k1 @@ -43853,7 +39673,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_64_v2i64: +; BITALG_NOVLX-LABEL: ugt_62_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -43863,7 +39683,7 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_64_v2i64: +; BITALG-LABEL: ugt_62_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -43873,13 +39693,13 @@ define <2 x i64> @ugt_64_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ugt <2 x i64> %2, + %3 = icmp ugt <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } -define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { -; SSE2-LABEL: ult_65_v2i64: +define <2 x i64> @ult_63_v2i64(<2 x i64> %0) { +; SSE2-LABEL: ult_63_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 @@ -43898,7 +39718,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: por {{.*}}(%rip), %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483713,2147483713] +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43909,7 +39729,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: retq ; -; SSE3-LABEL: ult_65_v2i64: +; SSE3-LABEL: ult_63_v2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 @@ -43928,7 +39748,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483713,2147483713] +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43939,7 +39759,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: retq ; -; SSSE3-LABEL: ult_65_v2i64: +; SSSE3-LABEL: ult_63_v2i64: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 @@ -43954,7 +39774,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: pxor %xmm0, %xmm0 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 ; SSSE3-NEXT: por {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483713,2147483713] +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43965,7 +39785,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: ult_65_v2i64: +; SSE41-LABEL: ult_63_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -43980,7 +39800,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSE41-NEXT: pxor %xmm0, %xmm0 ; SSE41-NEXT: psadbw %xmm3, %xmm0 ; SSE41-NEXT: por {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483713,2147483713] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483711,2147483711] ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] @@ -43991,7 +39811,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: ult_65_v2i64: +; AVX1-LABEL: ult_63_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -44003,11 +39823,11 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [65,65] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_65_v2i64: +; AVX2-LABEL: ult_63_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -44019,20 +39839,20 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [65,65] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_65_v2i64: +; AVX512VPOPCNTDQ-LABEL: ult_63_v2i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [65,65] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_65_v2i64: +; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip), %xmm0, %k1 @@ -44040,18 +39860,18 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_65_v2i64: +; BITALG_NOVLX-LABEL: ult_63_v2i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [65,65] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_65_v2i64: +; BITALG-LABEL: ult_63_v2i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -44061,7 +39881,7 @@ define <2 x i64> @ult_65_v2i64(<2 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) - %3 = icmp ult <2 x i64> %2, + %3 = icmp ult <2 x i64> %2, %4 = sext <2 x i1> %3 to <2 x i64> ret <2 x i64> %4 } diff --git a/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll index 809c1c5..8e66fd6 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll @@ -7,200 +7,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG -define <32 x i8> @ult_0_v32i8(<32 x i8> %0) { -; ALL-LABEL: ult_0_v32i8: -; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; ALL-NEXT: retq - %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0) - %3 = icmp ult <32 x i8> %2, - %4 = sext <32 x i1> %3 to <32 x i8> - ret <32 x i8> %4 -} - -define <32 x i8> @ugt_0_v32i8(<32 x i8> %0) { -; AVX1-LABEL: ugt_0_v32i8: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_0_v32i8: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v32i8: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v32i8: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_0_v32i8: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_0_v32i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0) - %3 = icmp ugt <32 x i8> %2, - %4 = sext <32 x i1> %3 to <32 x i8> - ret <32 x i8> %4 -} - -define <32 x i8> @ult_1_v32i8(<32 x i8> %0) { -; AVX1-LABEL: ult_1_v32i8: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_1_v32i8: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_1_v32i8: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v32i8: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_1_v32i8: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_1_v32i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0) - %3 = icmp ult <32 x i8> %2, - %4 = sext <32 x i1> %3 to <32 x i8> - ret <32 x i8> %4 -} - define <32 x i8> @ugt_1_v32i8(<32 x i8> %0) { ; AVX1-LABEL: ugt_1_v32i8: ; AVX1: # %bb.0: @@ -1205,183 +1011,137 @@ define <32 x i8> @ult_7_v32i8(<32 x i8> %0) { ret <32 x i8> %4 } -define <32 x i8> @ugt_7_v32i8(<32 x i8> %0) { -; AVX1-LABEL: ugt_7_v32i8: +define <16 x i16> @ugt_1_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_1_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm3 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpmaxub %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm4 +; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v32i8: +; AVX2-LABEL: ugt_1_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm1 -; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v32i8: +; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NEXT: vpaddw %ymm1, %ymm0, %ymm1 ; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v32i8: +; AVX512VPOPCNTDQVL-LABEL: ugt_1_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1 ; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v32i8: +; BITALG_NOVLX-LABEL: ugt_1_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v32i8: +; BITALG-LABEL: ugt_1_v16i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %ymm0 +; BITALG-NEXT: vpopcntw %ymm0, %ymm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq - %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0) - %3 = icmp ugt <32 x i8> %2, - %4 = sext <32 x i1> %3 to <32 x i8> - ret <32 x i8> %4 + %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) + %3 = icmp ugt <16 x i16> %2, + %4 = sext <16 x i1> %3 to <16 x i16> + ret <16 x i16> %4 } -define <32 x i8> @ult_8_v32i8(<32 x i8> %0) { -; AVX1-LABEL: ult_8_v32i8: +define <16 x i16> @ult_2_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_2_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpminub %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v32i8: +; AVX2-LABEL: ult_2_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminub {{.*}}(%rip), %ymm0, %ymm1 -; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v32i8: +; AVX512VPOPCNTDQ-LABEL: ult_2_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NEXT: vpaddw %ymm1, %ymm0, %ymm1 ; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpminub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v32i8: +; AVX512VPOPCNTDQVL-LABEL: ult_2_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1 ; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpminub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v32i8: +; BITALG_NOVLX-LABEL: ult_2_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; BITALG_NOVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v32i8: +; BITALG-LABEL: ult_2_v16i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %ymm0 +; BITALG-NEXT: vpopcntw %ymm0, %ymm0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq - %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0) - %3 = icmp ult <32 x i8> %2, - %4 = sext <32 x i1> %3 to <32 x i8> - ret <32 x i8> %4 + %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) + %3 = icmp ult <16 x i16> %2, + %4 = sext <16 x i1> %3 to <16 x i16> + ret <16 x i16> %4 } -define <32 x i8> @ugt_8_v32i8(<32 x i8> %0) { -; AVX1-LABEL: ugt_8_v32i8: +define <16 x i16> @ugt_2_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_2_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1391,6 +1151,9 @@ define <32 x i8> @ugt_8_v32i8(<32 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 +; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 @@ -1398,15 +1161,16 @@ define <32 x i8> @ugt_8_v32i8(<32 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm3 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpmaxub %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v32i8: +; AVX2-LABEL: ugt_2_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1416,59 +1180,49 @@ define <32 x i8> @ugt_8_v32i8(<32 x i8> %0) { ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm1 -; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 +; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v32i8: +; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v32i8: +; AVX512VPOPCNTDQVL-LABEL: ugt_2_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpmaxub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v32i8: +; BITALG_NOVLX-LABEL: ugt_2_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtb {{.*}}(%rip), %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v32i8: +; BITALG-LABEL: ugt_2_v16i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %ymm0 +; BITALG-NEXT: vpopcntw %ymm0, %ymm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq - %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0) - %3 = icmp ugt <32 x i8> %2, - %4 = sext <32 x i1> %3 to <32 x i8> - ret <32 x i8> %4 + %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) + %3 = icmp ugt <16 x i16> %2, + %4 = sext <16 x i1> %3 to <16 x i16> + ret <16 x i16> %4 } -define <32 x i8> @ult_9_v32i8(<32 x i8> %0) { -; AVX1-LABEL: ult_9_v32i8: +define <16 x i16> @ult_3_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_3_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1478,6 +1232,9 @@ define <32 x i8> @ult_9_v32i8(<32 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 +; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 @@ -1485,15 +1242,16 @@ define <32 x i8> @ult_9_v32i8(<32 x i8> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 -; AVX1-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpminub %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v32i8: +; AVX2-LABEL: ult_3_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1503,71 +1261,53 @@ define <32 x i8> @ult_9_v32i8(<32 x i8> %0) { ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminub {{.*}}(%rip), %ymm0, %ymm1 -; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 +; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v32i8: +; AVX512VPOPCNTDQ-LABEL: ult_3_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpminub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v32i8: +; AVX512VPOPCNTDQVL-LABEL: ult_3_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpminub {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v32i8: +; BITALG_NOVLX-LABEL: ult_3_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; BITALG_NOVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v32i8: +; BITALG-LABEL: ult_3_v16i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %ymm0 +; BITALG-NEXT: vpopcntw %ymm0, %ymm0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq - %2 = tail call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %0) - %3 = icmp ult <32 x i8> %2, - %4 = sext <32 x i1> %3 to <32 x i8> - ret <32 x i8> %4 -} - -define <16 x i16> @ult_0_v16i16(<16 x i16> %0) { -; ALL-LABEL: ult_0_v16i16: -; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; ALL-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_0_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_0_v16i16: +define <16 x i16> @ugt_3_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_3_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1590,16 +1330,13 @@ define <16 x i16> @ugt_0_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_0_v16i16: +; AVX2-LABEL: ugt_3_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1612,58 +1349,46 @@ define <16 x i16> @ugt_0_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_3_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_0_v16i16: +; BITALG_NOVLX-LABEL: ugt_3_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_0_v16i16: +; BITALG-LABEL: ugt_3_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_1_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_1_v16i16: +define <16 x i16> @ult_4_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_4_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1686,13 +1411,13 @@ define <16 x i16> @ult_1_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_1_v16i16: +; AVX2-LABEL: ult_4_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1705,179 +1430,50 @@ define <16 x i16> @ult_1_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_1_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_4_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_4_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_1_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_1_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ugt_1_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_1_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_1_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpaddw %ymm1, %ymm0, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_1_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_1_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_1_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ult_2_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_2_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_2_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_2_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpaddw %ymm1, %ymm0, %ymm1 -; AVX512VPOPCNTDQ-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_2_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm1 -; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_2_v16i16: +; BITALG_NOVLX-LABEL: ult_4_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_2_v16i16: +; BITALG-LABEL: ult_4_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_2_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_2_v16i16: +define <16 x i16> @ugt_4_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_4_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1900,13 +1496,13 @@ define <16 x i16> @ugt_2_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_2_v16i16: +; AVX2-LABEL: ugt_4_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1922,7 +1518,7 @@ define <16 x i16> @ugt_2_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -1930,7 +1526,7 @@ define <16 x i16> @ugt_2_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_2_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_4_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -1938,27 +1534,27 @@ define <16 x i16> @ugt_2_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_2_v16i16: +; BITALG_NOVLX-LABEL: ugt_4_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_2_v16i16: +; BITALG-LABEL: ugt_4_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_3_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_3_v16i16: +define <16 x i16> @ult_5_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_5_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -1981,13 +1577,13 @@ define <16 x i16> @ult_3_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_3_v16i16: +; AVX2-LABEL: ult_5_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2000,50 +1596,50 @@ define <16 x i16> @ult_3_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_3_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_5_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_3_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_5_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_3_v16i16: +; BITALG_NOVLX-LABEL: ult_5_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_3_v16i16: +; BITALG-LABEL: ult_5_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_3_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_3_v16i16: +define <16 x i16> @ugt_5_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_5_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2066,13 +1662,13 @@ define <16 x i16> @ugt_3_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_3_v16i16: +; AVX2-LABEL: ugt_5_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2088,7 +1684,7 @@ define <16 x i16> @ugt_3_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -2096,7 +1692,7 @@ define <16 x i16> @ugt_3_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_3_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_5_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -2104,27 +1700,27 @@ define <16 x i16> @ugt_3_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_3_v16i16: +; BITALG_NOVLX-LABEL: ugt_5_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_3_v16i16: +; BITALG-LABEL: ugt_5_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_4_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_4_v16i16: +define <16 x i16> @ult_6_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_6_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2147,13 +1743,13 @@ define <16 x i16> @ult_4_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_4_v16i16: +; AVX2-LABEL: ult_6_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2166,50 +1762,50 @@ define <16 x i16> @ult_4_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_4_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_6_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_4_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_6_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_4_v16i16: +; BITALG_NOVLX-LABEL: ult_6_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_4_v16i16: +; BITALG-LABEL: ult_6_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_4_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_4_v16i16: +define <16 x i16> @ugt_6_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_6_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2232,13 +1828,13 @@ define <16 x i16> @ugt_4_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_4_v16i16: +; AVX2-LABEL: ugt_6_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2254,7 +1850,7 @@ define <16 x i16> @ugt_4_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -2262,7 +1858,7 @@ define <16 x i16> @ugt_4_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_4_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_6_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -2270,27 +1866,27 @@ define <16 x i16> @ugt_4_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_4_v16i16: +; BITALG_NOVLX-LABEL: ugt_6_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_4_v16i16: +; BITALG-LABEL: ugt_6_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_5_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_5_v16i16: +define <16 x i16> @ult_7_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_7_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2313,13 +1909,13 @@ define <16 x i16> @ult_5_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_5_v16i16: +; AVX2-LABEL: ult_7_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2332,50 +1928,50 @@ define <16 x i16> @ult_5_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_5_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_7_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_5_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_7_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_5_v16i16: +; BITALG_NOVLX-LABEL: ult_7_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_5_v16i16: +; BITALG-LABEL: ult_7_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_5_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_5_v16i16: +define <16 x i16> @ugt_7_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_7_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2398,13 +1994,13 @@ define <16 x i16> @ugt_5_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_5_v16i16: +; AVX2-LABEL: ugt_7_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2420,7 +2016,7 @@ define <16 x i16> @ugt_5_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_7_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -2428,7 +2024,7 @@ define <16 x i16> @ugt_5_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_5_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_7_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -2436,27 +2032,27 @@ define <16 x i16> @ugt_5_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_5_v16i16: +; BITALG_NOVLX-LABEL: ugt_7_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_5_v16i16: +; BITALG-LABEL: ugt_7_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_6_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_6_v16i16: +define <16 x i16> @ult_8_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_8_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2479,13 +2075,13 @@ define <16 x i16> @ult_6_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_6_v16i16: +; AVX2-LABEL: ult_8_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2498,50 +2094,50 @@ define <16 x i16> @ult_6_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_6_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_8_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_6_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_8_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_6_v16i16: +; BITALG_NOVLX-LABEL: ult_8_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_6_v16i16: +; BITALG-LABEL: ult_8_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_6_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_6_v16i16: +define <16 x i16> @ugt_8_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_8_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2564,13 +2160,13 @@ define <16 x i16> @ugt_6_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_6_v16i16: +; AVX2-LABEL: ugt_8_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2586,7 +2182,7 @@ define <16 x i16> @ugt_6_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_8_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -2594,7 +2190,7 @@ define <16 x i16> @ugt_6_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_6_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_8_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -2602,27 +2198,27 @@ define <16 x i16> @ugt_6_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_6_v16i16: +; BITALG_NOVLX-LABEL: ugt_8_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_6_v16i16: +; BITALG-LABEL: ugt_8_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_7_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_7_v16i16: +define <16 x i16> @ult_9_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_9_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2645,13 +2241,13 @@ define <16 x i16> @ult_7_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_7_v16i16: +; AVX2-LABEL: ult_9_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2664,50 +2260,50 @@ define <16 x i16> @ult_7_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_7_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_9_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_7_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_9_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_7_v16i16: +; BITALG_NOVLX-LABEL: ult_9_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_7_v16i16: +; BITALG-LABEL: ult_9_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_7_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_7_v16i16: +define <16 x i16> @ugt_9_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_9_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2730,13 +2326,13 @@ define <16 x i16> @ugt_7_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v16i16: +; AVX2-LABEL: ugt_9_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2752,7 +2348,7 @@ define <16 x i16> @ugt_7_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_9_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -2760,7 +2356,7 @@ define <16 x i16> @ugt_7_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_9_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -2768,27 +2364,27 @@ define <16 x i16> @ugt_7_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v16i16: +; BITALG_NOVLX-LABEL: ugt_9_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v16i16: +; BITALG-LABEL: ugt_9_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_8_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_8_v16i16: +define <16 x i16> @ult_10_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_10_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2811,13 +2407,13 @@ define <16 x i16> @ult_8_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v16i16: +; AVX2-LABEL: ult_10_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2830,50 +2426,50 @@ define <16 x i16> @ult_8_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_10_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_10_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v16i16: +; BITALG_NOVLX-LABEL: ult_10_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v16i16: +; BITALG-LABEL: ult_10_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_8_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_8_v16i16: +define <16 x i16> @ugt_10_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_10_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2896,13 +2492,13 @@ define <16 x i16> @ugt_8_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v16i16: +; AVX2-LABEL: ugt_10_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2918,7 +2514,7 @@ define <16 x i16> @ugt_8_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_10_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -2926,7 +2522,7 @@ define <16 x i16> @ugt_8_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_10_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -2934,27 +2530,27 @@ define <16 x i16> @ugt_8_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v16i16: +; BITALG_NOVLX-LABEL: ugt_10_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v16i16: +; BITALG-LABEL: ugt_10_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_9_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_9_v16i16: +define <16 x i16> @ult_11_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_11_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -2977,13 +2573,13 @@ define <16 x i16> @ult_9_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v16i16: +; AVX2-LABEL: ult_11_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2996,50 +2592,50 @@ define <16 x i16> @ult_9_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_11_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_11_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v16i16: +; BITALG_NOVLX-LABEL: ult_11_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v16i16: +; BITALG-LABEL: ult_11_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_9_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_9_v16i16: +define <16 x i16> @ugt_11_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_11_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3062,13 +2658,13 @@ define <16 x i16> @ugt_9_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_9_v16i16: +; AVX2-LABEL: ugt_11_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3084,7 +2680,7 @@ define <16 x i16> @ugt_9_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_9_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_11_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -3092,7 +2688,7 @@ define <16 x i16> @ugt_9_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_9_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_11_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -3100,27 +2696,27 @@ define <16 x i16> @ugt_9_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_9_v16i16: +; BITALG_NOVLX-LABEL: ugt_11_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_9_v16i16: +; BITALG-LABEL: ugt_11_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_10_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_10_v16i16: +define <16 x i16> @ult_12_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_12_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3143,13 +2739,13 @@ define <16 x i16> @ult_10_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_10_v16i16: +; AVX2-LABEL: ult_12_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3162,50 +2758,50 @@ define <16 x i16> @ult_10_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_10_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_12_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_10_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_12_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_10_v16i16: +; BITALG_NOVLX-LABEL: ult_12_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_10_v16i16: +; BITALG-LABEL: ult_12_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_10_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_10_v16i16: +define <16 x i16> @ugt_12_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_12_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3228,13 +2824,13 @@ define <16 x i16> @ugt_10_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_10_v16i16: +; AVX2-LABEL: ugt_12_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3250,7 +2846,7 @@ define <16 x i16> @ugt_10_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_10_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_12_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -3258,7 +2854,7 @@ define <16 x i16> @ugt_10_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_10_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_12_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -3266,27 +2862,27 @@ define <16 x i16> @ugt_10_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_10_v16i16: +; BITALG_NOVLX-LABEL: ugt_12_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_10_v16i16: +; BITALG-LABEL: ugt_12_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_11_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_11_v16i16: +define <16 x i16> @ult_13_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_13_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3309,13 +2905,13 @@ define <16 x i16> @ult_11_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_11_v16i16: +; AVX2-LABEL: ult_13_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3328,50 +2924,50 @@ define <16 x i16> @ult_11_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_11_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_13_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_11_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_13_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_11_v16i16: +; BITALG_NOVLX-LABEL: ult_13_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_11_v16i16: +; BITALG-LABEL: ult_13_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_11_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_11_v16i16: +define <16 x i16> @ugt_13_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_13_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3394,13 +2990,13 @@ define <16 x i16> @ugt_11_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_11_v16i16: +; AVX2-LABEL: ugt_13_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3416,7 +3012,7 @@ define <16 x i16> @ugt_11_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_11_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_13_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -3424,7 +3020,7 @@ define <16 x i16> @ugt_11_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_11_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_13_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -3432,27 +3028,27 @@ define <16 x i16> @ugt_11_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_11_v16i16: +; BITALG_NOVLX-LABEL: ugt_13_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_11_v16i16: +; BITALG-LABEL: ugt_13_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_12_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_12_v16i16: +define <16 x i16> @ult_14_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_14_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3475,13 +3071,13 @@ define <16 x i16> @ult_12_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_12_v16i16: +; AVX2-LABEL: ult_14_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3494,50 +3090,50 @@ define <16 x i16> @ult_12_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_12_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_14_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_12_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_14_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_12_v16i16: +; BITALG_NOVLX-LABEL: ult_14_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_12_v16i16: +; BITALG-LABEL: ult_14_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_12_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_12_v16i16: +define <16 x i16> @ugt_14_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ugt_14_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3560,13 +3156,13 @@ define <16 x i16> @ugt_12_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_12_v16i16: +; AVX2-LABEL: ugt_14_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3582,7 +3178,7 @@ define <16 x i16> @ugt_12_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_12_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_14_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -3590,7 +3186,7 @@ define <16 x i16> @ugt_12_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_12_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_14_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 @@ -3598,27 +3194,27 @@ define <16 x i16> @ugt_12_v16i16(<16 x i16> %0) { ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_12_v16i16: +; BITALG_NOVLX-LABEL: ugt_14_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_12_v16i16: +; BITALG-LABEL: ugt_14_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, + %3 = icmp ugt <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ult_13_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_13_v16i16: +define <16 x i16> @ult_15_v16i16(<16 x i16> %0) { +; AVX1-LABEL: ult_15_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -3641,13 +3237,13 @@ define <16 x i16> @ult_13_v16i16(<16 x i16> %0) { ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_13_v16i16: +; AVX2-LABEL: ult_15_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3660,1854 +3256,150 @@ define <16 x i16> @ult_13_v16i16(<16 x i16> %0) { ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_13_v16i16: +; AVX512VPOPCNTDQ-LABEL: ult_15_v16i16: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] +; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_13_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ult_15_v16i16: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] +; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_13_v16i16: +; BITALG_NOVLX-LABEL: ult_15_v16i16: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] +; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_13_v16i16: +; BITALG-LABEL: ult_15_v16i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %ymm0, %ymm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, + %3 = icmp ult <16 x i16> %2, %4 = sext <16 x i1> %3 to <16 x i16> ret <16 x i16> %4 } -define <16 x i16> @ugt_13_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_13_v16i16: +define <8 x i32> @ugt_1_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_1_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13] -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm4 +; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_13_v16i16: +; AVX2-LABEL: ugt_1_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_13_v16i16: +; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_13_v16i16: +; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_13_v16i16: +; BITALG_NOVLX-LABEL: ugt_1_v8i32: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG_NOVLX-NEXT: vpaddd %ymm1, %ymm0, %ymm1 +; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_13_v16i16: +; BITALG-LABEL: ugt_1_v8i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 +; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG-NEXT: vpaddd %ymm1, %ymm0, %ymm1 +; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 +; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 +; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 ; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ult_14_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_14_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_14_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_14_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_14_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_14_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_14_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ugt_14_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_14_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14] -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_14_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_14_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_14_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_14_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_14_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ult_15_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_15_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_15_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_15_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_15_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_15_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_15_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ugt_15_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_15_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_15_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_15_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_15_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_15_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_15_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ult_16_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_16_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_16_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_16_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_16_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_16_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_16_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ugt_16_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ugt_16_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_16_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_16_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_16_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_16_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpcmpgtw {{.*}}(%rip), %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_16_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ugt <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <16 x i16> @ult_17_v16i16(<16 x i16> %0) { -; AVX1-LABEL: ult_17_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 -; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17] -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_17_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] -; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_17_v16i16: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_17_v16i16: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] -; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_17_v16i16: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] -; BITALG_NOVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_17_v16i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %ymm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %0) - %3 = icmp ult <16 x i16> %2, - %4 = sext <16 x i1> %3 to <16 x i16> - ret <16 x i16> %4 -} - -define <8 x i32> @ult_0_v8i32(<8 x i32> %0) { -; ALL-LABEL: ult_0_v8i32: -; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; ALL-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ugt_0_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_0_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_0_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_0_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_0_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ult_1_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_1_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_1_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_1_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_1_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_1_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ugt_1_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_1_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_1_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_1_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG_NOVLX-NEXT: vpaddd %ymm1, %ymm0, %ymm1 -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_1_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG-NEXT: vpaddd %ymm1, %ymm0, %ymm1 -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ult_2_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_2_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_2_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_2_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_2_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_2_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG_NOVLX-NEXT: vpaddd %ymm1, %ymm0, %ymm1 -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_2_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG-NEXT: vpaddd %ymm1, %ymm0, %ymm1 -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ugt_2_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_2_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2] -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_2_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] -; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_2_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_2_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ult_3_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_3_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_3_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_3_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_3_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_3_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ugt_3_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_3_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3] -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_3_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] -; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_3_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_3_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ult_4_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_4_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_4_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_4_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_4_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_4_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ugt_4_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_4_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4] -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_4_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] -; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_4_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_4_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ult_5_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_5_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_5_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_5_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ult_5_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ult_5_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ugt_5_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_5_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5] -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_5_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] -; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i32: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_5_v8i32: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_5_v8i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, - %4 = sext <8 x i1> %3 to <8 x i32> - ret <8 x i32> %4 -} - -define <8 x i32> @ult_6_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_6_v8i32: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6] -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 + %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) + %3 = icmp ugt <8 x i32> %2, + %4 = sext <8 x i1> %3 to <8 x i32> + ret <8 x i32> %4 +} + +define <8 x i32> @ult_2_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_2_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_6_v8i32: +; AVX2-LABEL: ult_2_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] -; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_6_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_2_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_2_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -5515,41 +3407,31 @@ define <8 x i32> @ult_6_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_6_v8i32: +; BITALG_NOVLX-LABEL: ult_2_v8i32: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG_NOVLX-NEXT: vpaddd %ymm1, %ymm0, %ymm1 +; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] -; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_6_v8i32: +; BITALG-LABEL: ult_2_v8i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 +; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG-NEXT: vpaddd %ymm1, %ymm0, %ymm1 +; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; BITALG-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_6_v8i32: +define <8 x i32> @ugt_2_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_2_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5577,13 +3459,13 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2,2,2] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_6_v8i32: +; AVX2-LABEL: ugt_2_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -5599,19 +3481,19 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -5619,7 +3501,7 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_6_v8i32: +; BITALG_NOVLX-LABEL: ugt_2_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -5629,11 +3511,11 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_6_v8i32: +; BITALG-LABEL: ugt_2_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5647,13 +3529,13 @@ define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_7_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_7_v8i32: +define <8 x i32> @ult_3_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_3_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5681,13 +3563,13 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_7_v8i32: +; AVX2-LABEL: ult_3_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -5703,19 +3585,19 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_7_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_3_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -5723,7 +3605,7 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_7_v8i32: +; BITALG_NOVLX-LABEL: ult_3_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -5733,11 +3615,11 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_7_v8i32: +; BITALG-LABEL: ult_3_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5751,13 +3633,13 @@ define <8 x i32> @ult_7_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_7_v8i32: +define <8 x i32> @ugt_3_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_3_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5785,13 +3667,13 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3,3,3] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v8i32: +; AVX2-LABEL: ugt_3_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -5807,19 +3689,19 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -5827,7 +3709,7 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v8i32: +; BITALG_NOVLX-LABEL: ugt_3_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -5837,11 +3719,11 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v8i32: +; BITALG-LABEL: ugt_3_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5855,13 +3737,13 @@ define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_8_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_8_v8i32: +define <8 x i32> @ult_4_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_4_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5889,13 +3771,13 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v8i32: +; AVX2-LABEL: ult_4_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -5911,19 +3793,19 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_4_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -5931,7 +3813,7 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v8i32: +; BITALG_NOVLX-LABEL: ult_4_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -5941,11 +3823,11 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v8i32: +; BITALG-LABEL: ult_4_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5959,13 +3841,13 @@ define <8 x i32> @ult_8_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_8_v8i32: +define <8 x i32> @ugt_4_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_4_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -5993,13 +3875,13 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v8i32: +; AVX2-LABEL: ugt_4_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6015,19 +3897,19 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6035,7 +3917,7 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v8i32: +; BITALG_NOVLX-LABEL: ugt_4_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6045,11 +3927,11 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v8i32: +; BITALG-LABEL: ugt_4_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6063,13 +3945,13 @@ define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_9_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_9_v8i32: +define <8 x i32> @ult_5_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_5_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6097,13 +3979,13 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v8i32: +; AVX2-LABEL: ult_5_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6119,19 +4001,19 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_5_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6139,7 +4021,7 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v8i32: +; BITALG_NOVLX-LABEL: ult_5_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6149,11 +4031,11 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v8i32: +; BITALG-LABEL: ult_5_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6167,13 +4049,13 @@ define <8 x i32> @ult_9_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_9_v8i32: +define <8 x i32> @ugt_5_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_5_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6201,13 +4083,13 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5,5,5] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_9_v8i32: +; AVX2-LABEL: ugt_5_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6223,19 +4105,19 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6243,7 +4125,7 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_9_v8i32: +; BITALG_NOVLX-LABEL: ugt_5_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6253,11 +4135,11 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_9_v8i32: +; BITALG-LABEL: ugt_5_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6271,13 +4153,13 @@ define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_10_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_10_v8i32: +define <8 x i32> @ult_6_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_6_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6305,13 +4187,13 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_10_v8i32: +; AVX2-LABEL: ult_6_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6327,19 +4209,19 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_10_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_6_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6347,7 +4229,7 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_10_v8i32: +; BITALG_NOVLX-LABEL: ult_6_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6357,11 +4239,11 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_10_v8i32: +; BITALG-LABEL: ult_6_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6375,13 +4257,13 @@ define <8 x i32> @ult_10_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_10_v8i32: +define <8 x i32> @ugt_6_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_6_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6409,13 +4291,13 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6,6,6] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_10_v8i32: +; AVX2-LABEL: ugt_6_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6431,19 +4313,19 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6451,7 +4333,7 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_10_v8i32: +; BITALG_NOVLX-LABEL: ugt_6_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6461,11 +4343,11 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_10_v8i32: +; BITALG-LABEL: ugt_6_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6479,13 +4361,13 @@ define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_11_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_11_v8i32: +define <8 x i32> @ult_7_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_7_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6513,13 +4395,13 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_11_v8i32: +; AVX2-LABEL: ult_7_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6535,19 +4417,19 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_11_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_7_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6555,7 +4437,7 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_11_v8i32: +; BITALG_NOVLX-LABEL: ult_7_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6565,11 +4447,11 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_11_v8i32: +; BITALG-LABEL: ult_7_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6583,13 +4465,13 @@ define <8 x i32> @ult_11_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_11_v8i32: +define <8 x i32> @ugt_7_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_7_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6617,13 +4499,13 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_11_v8i32: +; AVX2-LABEL: ugt_7_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6639,19 +4521,19 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6659,7 +4541,7 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_11_v8i32: +; BITALG_NOVLX-LABEL: ugt_7_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6669,11 +4551,11 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_11_v8i32: +; BITALG-LABEL: ugt_7_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6687,13 +4569,13 @@ define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_12_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_12_v8i32: +define <8 x i32> @ult_8_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_8_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6721,13 +4603,13 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_12_v8i32: +; AVX2-LABEL: ult_8_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6743,19 +4625,19 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_12_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_8_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6763,7 +4645,7 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_12_v8i32: +; BITALG_NOVLX-LABEL: ult_8_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6773,11 +4655,11 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_12_v8i32: +; BITALG-LABEL: ult_8_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6791,13 +4673,13 @@ define <8 x i32> @ult_12_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_12_v8i32: +define <8 x i32> @ugt_8_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_8_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6825,13 +4707,13 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_12_v8i32: +; AVX2-LABEL: ugt_8_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6847,19 +4729,19 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6867,7 +4749,7 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_12_v8i32: +; BITALG_NOVLX-LABEL: ugt_8_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6877,11 +4759,11 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_12_v8i32: +; BITALG-LABEL: ugt_8_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6895,13 +4777,13 @@ define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_13_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_13_v8i32: +define <8 x i32> @ult_9_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_9_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -6929,13 +4811,13 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_13_v8i32: +; AVX2-LABEL: ult_9_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -6951,19 +4833,19 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_13_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_9_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -6971,7 +4853,7 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_13_v8i32: +; BITALG_NOVLX-LABEL: ult_9_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -6981,11 +4863,11 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_13_v8i32: +; BITALG-LABEL: ult_9_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6999,13 +4881,13 @@ define <8 x i32> @ult_13_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_13_v8i32: +define <8 x i32> @ugt_9_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_9_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7033,13 +4915,13 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9,9,9] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_13_v8i32: +; AVX2-LABEL: ugt_9_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7055,19 +4937,19 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7075,7 +4957,7 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_13_v8i32: +; BITALG_NOVLX-LABEL: ugt_9_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7085,11 +4967,11 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_13_v8i32: +; BITALG-LABEL: ugt_9_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7103,13 +4985,13 @@ define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_14_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_14_v8i32: +define <8 x i32> @ult_10_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_10_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7137,13 +5019,13 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_14_v8i32: +; AVX2-LABEL: ult_10_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7159,19 +5041,19 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_14_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_10_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7179,7 +5061,7 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_14_v8i32: +; BITALG_NOVLX-LABEL: ult_10_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7189,11 +5071,11 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_14_v8i32: +; BITALG-LABEL: ult_10_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7207,13 +5089,13 @@ define <8 x i32> @ult_14_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_14_v8i32: +define <8 x i32> @ugt_10_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_10_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7241,13 +5123,13 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10,10,10] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_14_v8i32: +; AVX2-LABEL: ugt_10_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7263,19 +5145,19 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7283,7 +5165,7 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_14_v8i32: +; BITALG_NOVLX-LABEL: ugt_10_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7293,11 +5175,11 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_14_v8i32: +; BITALG-LABEL: ugt_10_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7311,13 +5193,13 @@ define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_15_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_15_v8i32: +define <8 x i32> @ult_11_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_11_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7345,13 +5227,13 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_15_v8i32: +; AVX2-LABEL: ult_11_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7367,19 +5249,19 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_15_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_11_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7387,7 +5269,7 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_15_v8i32: +; BITALG_NOVLX-LABEL: ult_11_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7397,11 +5279,11 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_15_v8i32: +; BITALG-LABEL: ult_11_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7415,13 +5297,13 @@ define <8 x i32> @ult_15_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_15_v8i32: +define <8 x i32> @ugt_11_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_11_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7449,13 +5331,13 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11,11,11] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_15_v8i32: +; AVX2-LABEL: ugt_11_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7471,19 +5353,19 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_15_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_15_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7491,7 +5373,7 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_15_v8i32: +; BITALG_NOVLX-LABEL: ugt_11_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7501,11 +5383,11 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_15_v8i32: +; BITALG-LABEL: ugt_11_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7519,13 +5401,13 @@ define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_16_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_16_v8i32: +define <8 x i32> @ult_12_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_12_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7553,13 +5435,13 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_16_v8i32: +; AVX2-LABEL: ult_12_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7575,19 +5457,19 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_16_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_12_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_16_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7595,7 +5477,7 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_16_v8i32: +; BITALG_NOVLX-LABEL: ult_12_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7605,11 +5487,11 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_16_v8i32: +; BITALG-LABEL: ult_12_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7623,13 +5505,13 @@ define <8 x i32> @ult_16_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_16_v8i32: +define <8 x i32> @ugt_12_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_12_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7657,13 +5539,13 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12,12,12] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_16_v8i32: +; AVX2-LABEL: ugt_12_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7679,19 +5561,19 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_16_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_16_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7699,7 +5581,7 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_16_v8i32: +; BITALG_NOVLX-LABEL: ugt_12_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7709,11 +5591,11 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_16_v8i32: +; BITALG-LABEL: ugt_12_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7727,13 +5609,13 @@ define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_17_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_17_v8i32: +define <8 x i32> @ult_13_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_13_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7761,13 +5643,13 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_17_v8i32: +; AVX2-LABEL: ult_13_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7783,19 +5665,19 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_17_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_13_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_17_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7803,7 +5685,7 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_17_v8i32: +; BITALG_NOVLX-LABEL: ult_13_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7813,11 +5695,11 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_17_v8i32: +; BITALG-LABEL: ult_13_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7831,13 +5713,13 @@ define <8 x i32> @ult_17_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_17_v8i32: +define <8 x i32> @ugt_13_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_13_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7865,13 +5747,13 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13,13,13] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_17_v8i32: +; AVX2-LABEL: ugt_13_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7887,19 +5769,19 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_17_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_17_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -7907,7 +5789,7 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_17_v8i32: +; BITALG_NOVLX-LABEL: ugt_13_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -7917,11 +5799,11 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_17_v8i32: +; BITALG-LABEL: ugt_13_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7935,13 +5817,13 @@ define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_18_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_18_v8i32: +define <8 x i32> @ult_14_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_14_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -7969,13 +5851,13 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_18_v8i32: +; AVX2-LABEL: ult_14_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -7991,19 +5873,19 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_18_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_14_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_18_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8011,7 +5893,7 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_18_v8i32: +; BITALG_NOVLX-LABEL: ult_14_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8021,11 +5903,11 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_18_v8i32: +; BITALG-LABEL: ult_14_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8039,13 +5921,13 @@ define <8 x i32> @ult_18_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_18_v8i32: +define <8 x i32> @ugt_14_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_14_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8073,13 +5955,13 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14,14,14] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_18_v8i32: +; AVX2-LABEL: ugt_14_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8095,19 +5977,19 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_18_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_18_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8115,7 +5997,7 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_18_v8i32: +; BITALG_NOVLX-LABEL: ugt_14_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8125,11 +6007,11 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_18_v8i32: +; BITALG-LABEL: ugt_14_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8143,13 +6025,13 @@ define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_19_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_19_v8i32: +define <8 x i32> @ult_15_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_15_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8177,13 +6059,13 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_19_v8i32: +; AVX2-LABEL: ult_15_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8199,19 +6081,19 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_19_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_15_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_19_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8219,7 +6101,7 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_19_v8i32: +; BITALG_NOVLX-LABEL: ult_15_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8229,11 +6111,11 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_19_v8i32: +; BITALG-LABEL: ult_15_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8247,13 +6129,13 @@ define <8 x i32> @ult_19_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_19_v8i32: +define <8 x i32> @ugt_15_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_15_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8281,13 +6163,13 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_19_v8i32: +; AVX2-LABEL: ugt_15_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8303,19 +6185,19 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_19_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_15_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_19_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_15_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8323,7 +6205,7 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_19_v8i32: +; BITALG_NOVLX-LABEL: ugt_15_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8333,11 +6215,11 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_19_v8i32: +; BITALG-LABEL: ugt_15_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8351,13 +6233,13 @@ define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_20_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_20_v8i32: +define <8 x i32> @ult_16_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_16_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8385,13 +6267,13 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_20_v8i32: +; AVX2-LABEL: ult_16_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8407,19 +6289,19 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_20_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_16_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_20_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_16_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8427,7 +6309,7 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_20_v8i32: +; BITALG_NOVLX-LABEL: ult_16_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8437,11 +6319,11 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_20_v8i32: +; BITALG-LABEL: ult_16_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8455,13 +6337,13 @@ define <8 x i32> @ult_20_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_20_v8i32: +define <8 x i32> @ugt_16_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_16_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8489,13 +6371,13 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_20_v8i32: +; AVX2-LABEL: ugt_16_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8511,19 +6393,19 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_20_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_16_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_20_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_16_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8531,7 +6413,7 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_20_v8i32: +; BITALG_NOVLX-LABEL: ugt_16_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8541,11 +6423,11 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_20_v8i32: +; BITALG-LABEL: ugt_16_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8559,13 +6441,13 @@ define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_21_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_21_v8i32: +define <8 x i32> @ult_17_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_17_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8593,13 +6475,13 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_21_v8i32: +; AVX2-LABEL: ult_17_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8615,19 +6497,19 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_21_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_17_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_21_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_17_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8635,7 +6517,7 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_21_v8i32: +; BITALG_NOVLX-LABEL: ult_17_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8645,11 +6527,11 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_21_v8i32: +; BITALG-LABEL: ult_17_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8663,13 +6545,13 @@ define <8 x i32> @ult_21_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_21_v8i32: +define <8 x i32> @ugt_17_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_17_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8697,13 +6579,13 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17,17,17] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_21_v8i32: +; AVX2-LABEL: ugt_17_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8719,19 +6601,19 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_21_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_17_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_21_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_17_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8739,7 +6621,7 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_21_v8i32: +; BITALG_NOVLX-LABEL: ugt_17_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8749,11 +6631,11 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_21_v8i32: +; BITALG-LABEL: ugt_17_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8767,13 +6649,13 @@ define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_22_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_22_v8i32: +define <8 x i32> @ult_18_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_18_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8801,13 +6683,13 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_22_v8i32: +; AVX2-LABEL: ult_18_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8823,19 +6705,19 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_22_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_18_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_22_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_18_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8843,7 +6725,7 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_22_v8i32: +; BITALG_NOVLX-LABEL: ult_18_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8853,11 +6735,11 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_22_v8i32: +; BITALG-LABEL: ult_18_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8871,13 +6753,13 @@ define <8 x i32> @ult_22_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_22_v8i32: +define <8 x i32> @ugt_18_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_18_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -8905,13 +6787,13 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18,18,18] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_22_v8i32: +; AVX2-LABEL: ugt_18_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -8927,19 +6809,19 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_22_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_18_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_22_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_18_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -8947,7 +6829,7 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_22_v8i32: +; BITALG_NOVLX-LABEL: ugt_18_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -8957,11 +6839,11 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [18,18,18,18,18,18,18,18] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_22_v8i32: +; BITALG-LABEL: ugt_18_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8975,13 +6857,13 @@ define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_23_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_23_v8i32: +define <8 x i32> @ult_19_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_19_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9009,13 +6891,13 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_23_v8i32: +; AVX2-LABEL: ult_19_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9031,19 +6913,19 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_23_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_19_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_23_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_19_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9051,7 +6933,7 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_23_v8i32: +; BITALG_NOVLX-LABEL: ult_19_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9061,11 +6943,11 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_23_v8i32: +; BITALG-LABEL: ult_19_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9079,13 +6961,13 @@ define <8 x i32> @ult_23_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_23_v8i32: +define <8 x i32> @ugt_19_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_19_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9113,13 +6995,13 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19,19,19] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_23_v8i32: +; AVX2-LABEL: ugt_19_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9135,19 +7017,19 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_23_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_19_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_23_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_19_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9155,7 +7037,7 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_23_v8i32: +; BITALG_NOVLX-LABEL: ugt_19_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9165,11 +7047,11 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [19,19,19,19,19,19,19,19] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_23_v8i32: +; BITALG-LABEL: ugt_19_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9183,13 +7065,13 @@ define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_24_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_24_v8i32: +define <8 x i32> @ult_20_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_20_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9217,13 +7099,13 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_24_v8i32: +; AVX2-LABEL: ult_20_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9239,19 +7121,19 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_24_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_20_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_24_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_20_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9259,7 +7141,7 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_24_v8i32: +; BITALG_NOVLX-LABEL: ult_20_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9269,11 +7151,11 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_24_v8i32: +; BITALG-LABEL: ult_20_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9287,13 +7169,13 @@ define <8 x i32> @ult_24_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_24_v8i32: +define <8 x i32> @ugt_20_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_20_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9321,13 +7203,13 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20,20,20] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_24_v8i32: +; AVX2-LABEL: ugt_20_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9343,19 +7225,19 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_24_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_20_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_24_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_20_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9363,7 +7245,7 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_24_v8i32: +; BITALG_NOVLX-LABEL: ugt_20_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9373,11 +7255,11 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [20,20,20,20,20,20,20,20] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_24_v8i32: +; BITALG-LABEL: ugt_20_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9391,13 +7273,13 @@ define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_25_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_25_v8i32: +define <8 x i32> @ult_21_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_21_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9425,13 +7307,13 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_25_v8i32: +; AVX2-LABEL: ult_21_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9447,19 +7329,19 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_25_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_21_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_25_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_21_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9467,7 +7349,7 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_25_v8i32: +; BITALG_NOVLX-LABEL: ult_21_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9477,11 +7359,11 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_25_v8i32: +; BITALG-LABEL: ult_21_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9495,13 +7377,13 @@ define <8 x i32> @ult_25_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_25_v8i32: +define <8 x i32> @ugt_21_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_21_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9529,13 +7411,13 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21,21,21] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_25_v8i32: +; AVX2-LABEL: ugt_21_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9551,19 +7433,19 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_25_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_21_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_25_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_21_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9571,7 +7453,7 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_25_v8i32: +; BITALG_NOVLX-LABEL: ugt_21_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9581,11 +7463,11 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [21,21,21,21,21,21,21,21] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_25_v8i32: +; BITALG-LABEL: ugt_21_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9599,13 +7481,13 @@ define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_26_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_26_v8i32: +define <8 x i32> @ult_22_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_22_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9633,13 +7515,13 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_26_v8i32: +; AVX2-LABEL: ult_22_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9655,19 +7537,19 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_26_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_22_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_26_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_22_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9675,7 +7557,7 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_26_v8i32: +; BITALG_NOVLX-LABEL: ult_22_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9685,11 +7567,11 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_26_v8i32: +; BITALG-LABEL: ult_22_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9703,13 +7585,13 @@ define <8 x i32> @ult_26_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_26_v8i32: +define <8 x i32> @ugt_22_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_22_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9737,13 +7619,13 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22,22,22] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_26_v8i32: +; AVX2-LABEL: ugt_22_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9759,19 +7641,19 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_26_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_22_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_26_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_22_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9779,7 +7661,7 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_26_v8i32: +; BITALG_NOVLX-LABEL: ugt_22_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9789,11 +7671,11 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [22,22,22,22,22,22,22,22] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_26_v8i32: +; BITALG-LABEL: ugt_22_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9807,13 +7689,13 @@ define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_27_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_27_v8i32: +define <8 x i32> @ult_23_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_23_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9841,13 +7723,13 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_27_v8i32: +; AVX2-LABEL: ult_23_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9863,19 +7745,19 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_27_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_23_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_27_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_23_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9883,7 +7765,7 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_27_v8i32: +; BITALG_NOVLX-LABEL: ult_23_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9893,11 +7775,11 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_27_v8i32: +; BITALG-LABEL: ult_23_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9911,13 +7793,13 @@ define <8 x i32> @ult_27_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_27_v8i32: +define <8 x i32> @ugt_23_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_23_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -9945,13 +7827,13 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23,23,23] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_27_v8i32: +; AVX2-LABEL: ugt_23_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -9967,19 +7849,19 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_27_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_23_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_27_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_23_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -9987,7 +7869,7 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_27_v8i32: +; BITALG_NOVLX-LABEL: ugt_23_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -9997,11 +7879,11 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [23,23,23,23,23,23,23,23] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_27_v8i32: +; BITALG-LABEL: ugt_23_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10015,13 +7897,13 @@ define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_28_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_28_v8i32: +define <8 x i32> @ult_24_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_24_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10049,13 +7931,13 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_28_v8i32: +; AVX2-LABEL: ult_24_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10071,19 +7953,19 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_28_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_24_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_28_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_24_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10091,7 +7973,7 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_28_v8i32: +; BITALG_NOVLX-LABEL: ult_24_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10101,11 +7983,11 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_28_v8i32: +; BITALG-LABEL: ult_24_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10119,13 +8001,13 @@ define <8 x i32> @ult_28_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_28_v8i32: +define <8 x i32> @ugt_24_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_24_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10153,13 +8035,13 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24,24,24] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_28_v8i32: +; AVX2-LABEL: ugt_24_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10175,19 +8057,19 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_28_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_24_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_28_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_24_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10195,7 +8077,7 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_28_v8i32: +; BITALG_NOVLX-LABEL: ugt_24_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10205,11 +8087,11 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [24,24,24,24,24,24,24,24] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_28_v8i32: +; BITALG-LABEL: ugt_24_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10223,13 +8105,13 @@ define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_29_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_29_v8i32: +define <8 x i32> @ult_25_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_25_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10257,13 +8139,13 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_29_v8i32: +; AVX2-LABEL: ult_25_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10279,19 +8161,19 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_29_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_25_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_29_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_25_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10299,7 +8181,7 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_29_v8i32: +; BITALG_NOVLX-LABEL: ult_25_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10309,11 +8191,11 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_29_v8i32: +; BITALG-LABEL: ult_25_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10327,13 +8209,13 @@ define <8 x i32> @ult_29_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_29_v8i32: +define <8 x i32> @ugt_25_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_25_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10361,13 +8243,13 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25,25,25] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_29_v8i32: +; AVX2-LABEL: ugt_25_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10383,19 +8265,19 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_29_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_25_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_29_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_25_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10403,7 +8285,7 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_29_v8i32: +; BITALG_NOVLX-LABEL: ugt_25_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10413,11 +8295,11 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [25,25,25,25,25,25,25,25] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_29_v8i32: +; BITALG-LABEL: ugt_25_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10431,13 +8313,13 @@ define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_30_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_30_v8i32: +define <8 x i32> @ult_26_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_26_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10465,13 +8347,13 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_30_v8i32: +; AVX2-LABEL: ult_26_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10487,19 +8369,19 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_30_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_26_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_30_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_26_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10507,7 +8389,7 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_30_v8i32: +; BITALG_NOVLX-LABEL: ult_26_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10517,11 +8399,11 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_30_v8i32: +; BITALG-LABEL: ult_26_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10535,13 +8417,13 @@ define <8 x i32> @ult_30_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_30_v8i32: +define <8 x i32> @ugt_26_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_26_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10569,13 +8451,13 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26,26,26] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_30_v8i32: +; AVX2-LABEL: ugt_26_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10591,19 +8473,19 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_30_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_26_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_30_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_26_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10611,7 +8493,7 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_30_v8i32: +; BITALG_NOVLX-LABEL: ugt_26_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10621,11 +8503,11 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [26,26,26,26,26,26,26,26] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_30_v8i32: +; BITALG-LABEL: ugt_26_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10639,13 +8521,13 @@ define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_31_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_31_v8i32: +define <8 x i32> @ult_27_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_27_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10673,13 +8555,13 @@ define <8 x i32> @ult_31_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31,31,31] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_31_v8i32: +; AVX2-LABEL: ult_27_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10695,19 +8577,19 @@ define <8 x i32> @ult_31_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_31_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_27_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_31_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_27_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10715,7 +8597,7 @@ define <8 x i32> @ult_31_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_31_v8i32: +; BITALG_NOVLX-LABEL: ult_27_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10725,11 +8607,11 @@ define <8 x i32> @ult_31_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_31_v8i32: +; BITALG-LABEL: ult_27_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10743,13 +8625,13 @@ define <8 x i32> @ult_31_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_31_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_31_v8i32: +define <8 x i32> @ugt_27_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_27_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10777,13 +8659,13 @@ define <8 x i32> @ugt_31_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31,31,31] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27,27,27] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_31_v8i32: +; AVX2-LABEL: ugt_27_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10799,19 +8681,19 @@ define <8 x i32> @ugt_31_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_31_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_27_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_31_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_27_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10819,7 +8701,7 @@ define <8 x i32> @ugt_31_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_31_v8i32: +; BITALG_NOVLX-LABEL: ugt_27_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10829,11 +8711,11 @@ define <8 x i32> @ugt_31_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [27,27,27,27,27,27,27,27] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_31_v8i32: +; BITALG-LABEL: ugt_27_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10847,13 +8729,13 @@ define <8 x i32> @ugt_31_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_32_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_32_v8i32: +define <8 x i32> @ult_28_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_28_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10881,13 +8763,13 @@ define <8 x i32> @ult_32_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32,32,32] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_32_v8i32: +; AVX2-LABEL: ult_28_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -10903,19 +8785,19 @@ define <8 x i32> @ult_32_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_32_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_28_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_32_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_28_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -10923,7 +8805,7 @@ define <8 x i32> @ult_32_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_32_v8i32: +; BITALG_NOVLX-LABEL: ult_28_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -10933,11 +8815,11 @@ define <8 x i32> @ult_32_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_32_v8i32: +; BITALG-LABEL: ult_28_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -10951,13 +8833,13 @@ define <8 x i32> @ult_32_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ugt_32_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ugt_32_v8i32: +define <8 x i32> @ugt_28_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_28_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -10985,13 +8867,13 @@ define <8 x i32> @ugt_32_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32,32,32] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28,28,28] ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_32_v8i32: +; AVX2-LABEL: ugt_28_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -11007,19 +8889,19 @@ define <8 x i32> @ugt_32_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_32_v8i32: +; AVX512VPOPCNTDQ-LABEL: ugt_28_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_32_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ugt_28_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -11027,7 +8909,7 @@ define <8 x i32> @ugt_32_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_32_v8i32: +; BITALG_NOVLX-LABEL: ugt_28_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -11037,11 +8919,11 @@ define <8 x i32> @ugt_32_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [28,28,28,28,28,28,28,28] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_32_v8i32: +; BITALG-LABEL: ugt_28_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11055,13 +8937,13 @@ define <8 x i32> @ugt_32_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ugt <8 x i32> %2, + %3 = icmp ugt <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <8 x i32> @ult_33_v8i32(<8 x i32> %0) { -; AVX1-LABEL: ult_33_v8i32: +define <8 x i32> @ult_29_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_29_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11089,13 +8971,13 @@ define <8 x i32> @ult_33_v8i32(<8 x i32> %0) { ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33,33,33] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29] ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_33_v8i32: +; AVX2-LABEL: ult_29_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -11111,19 +8993,19 @@ define <8 x i32> @ult_33_v8i32(<8 x i32> %0) { ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [33,33,33,33,33,33,33,33] +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_33_v8i32: +; AVX512VPOPCNTDQ-LABEL: ult_29_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [33,33,33,33,33,33,33,33] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_33_v8i32: +; AVX512VPOPCNTDQVL-LABEL: ult_29_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 @@ -11131,7 +9013,7 @@ define <8 x i32> @ult_33_v8i32(<8 x i32> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_33_v8i32: +; BITALG_NOVLX-LABEL: ult_29_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 @@ -11141,11 +9023,11 @@ define <8 x i32> @ult_33_v8i32(<8 x i32> %0) { ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [33,33,33,33,33,33,33,33] +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] ; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_33_v8i32: +; BITALG-LABEL: ult_29_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11159,113 +9041,13 @@ define <8 x i32> @ult_33_v8i32(<8 x i32> %0) { ; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) - %3 = icmp ult <8 x i32> %2, + %3 = icmp ult <8 x i32> %2, %4 = sext <8 x i1> %3 to <8 x i32> ret <8 x i32> %4 } -define <4 x i64> @ult_0_v4i64(<4 x i64> %0) { -; ALL-LABEL: ult_0_v4i64: -; ALL: # %bb.0: -; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; ALL-NEXT: retq - %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, - %4 = sext <4 x i1> %3 to <4 x i64> - ret <4 x i64> %4 -} - -define <4 x i64> @ugt_0_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_0_v4i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_0_v4i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v4i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_0_v4i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_0_v4i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_0_v4i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, - %4 = sext <4 x i1> %3 to <4 x i64> - ret <4 x i64> %4 -} - -define <4 x i64> @ult_1_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_1_v4i64: +define <8 x i32> @ugt_29_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_29_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11276,7 +9058,11 @@ define <4 x i64> @ult_1_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 ; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 @@ -11284,13 +9070,18 @@ define <4 x i64> @ult_1_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm4, %xmm2, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29,29,29] +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_1_v4i64: +; AVX2-LABEL: ugt_29_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -11301,181 +9092,170 @@ define <4 x i64> @ult_1_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_1_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_29_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_1_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_29_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_1_v4i64: +; BITALG_NOVLX-LABEL: ugt_29_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [29,29,29,29,29,29,29,29] +; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_1_v4i64: +; BITALG-LABEL: ugt_29_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: retq - %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, - %4 = sext <4 x i1> %3 to <4 x i64> - ret <4 x i64> %4 -} - -define <4 x i64> @ugt_1_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_1_v4i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_1_v4i64: -; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NEXT: retq -; -; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i64: -; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 -; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; AVX512VPOPCNTDQVL-NEXT: retq -; -; BITALG_NOVLX-LABEL: ugt_1_v4i64: -; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG_NOVLX-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; BITALG_NOVLX-NEXT: retq -; -; BITALG-LABEL: ugt_1_v4i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 +; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 +; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, - %4 = sext <4 x i1> %3 to <4 x i64> - ret <4 x i64> %4 + %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) + %3 = icmp ugt <8 x i32> %2, + %4 = sext <8 x i1> %3 to <8 x i32> + ret <8 x i32> %4 } -define <4 x i64> @ult_2_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_2_v4i64: +define <8 x i32> @ult_30_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_30_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 +; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 +; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 +; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero +; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 +; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 +; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_2_v4i64: +; AVX2-LABEL: ult_30_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 +; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_2_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_30_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_30_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_2_v4i64: +; BITALG_NOVLX-LABEL: ult_30_v8i32: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG_NOVLX-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_2_v4i64: +; BITALG-LABEL: ult_30_v8i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; BITALG-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 +; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 +; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, - %4 = sext <4 x i1> %3 to <4 x i64> - ret <4 x i64> %4 + %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) + %3 = icmp ult <8 x i32> %2, + %4 = sext <8 x i1> %3 to <8 x i32> + ret <8 x i32> %4 } -define <4 x i64> @ugt_2_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_2_v4i64: +define <8 x i32> @ugt_30_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ugt_30_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11486,7 +9266,11 @@ define <4 x i64> @ugt_2_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 ; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 @@ -11494,14 +9278,18 @@ define <4 x i64> @ugt_2_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2] -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30,30,30] +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_2_v4i64: +; AVX2-LABEL: ugt_30_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -11512,54 +9300,66 @@ define <4 x i64> @ugt_2_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_30_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_30_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_2_v4i64: +; BITALG_NOVLX-LABEL: ugt_30_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] -; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [30,30,30,30,30,30,30,30] +; BITALG_NOVLX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_2_v4i64: +; BITALG-LABEL: ugt_30_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 +; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to8}, %ymm0, %k1 ; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, - %4 = sext <4 x i1> %3 to <4 x i64> - ret <4 x i64> %4 + %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) + %3 = icmp ugt <8 x i32> %2, + %4 = sext <8 x i1> %3 to <8 x i32> + ret <8 x i32> %4 } -define <4 x i64> @ult_3_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_3_v4i64: +define <8 x i32> @ult_31_v8i32(<8 x i32> %0) { +; AVX1-LABEL: ult_31_v8i32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11570,7 +9370,11 @@ define <4 x i64> @ult_3_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 ; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 @@ -11578,14 +9382,18 @@ define <4 x i64> @ult_3_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; AVX1-NEXT: vpsadbw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31,31,31] +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_3_v4i64: +; AVX2-LABEL: ult_31_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -11596,104 +9404,100 @@ define <4 x i64> @ult_3_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_3_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_31_v8i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] -; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_31_v8i32: ; AVX512VPOPCNTDQVL: # %bb.0: -; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 +; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0 +; AVX512VPOPCNTDQVL-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512VPOPCNTDQVL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_3_v4i64: +; BITALG_NOVLX-LABEL: ult_31_v8i32: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] -; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; BITALG_NOVLX-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [31,31,31,31,31,31,31,31] +; BITALG_NOVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_3_v4i64: +; BITALG-LABEL: ult_31_v8i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 +; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 +; BITALG-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1 ; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; BITALG-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, - %4 = sext <4 x i1> %3 to <4 x i64> - ret <4 x i64> %4 + %2 = tail call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %0) + %3 = icmp ult <8 x i32> %2, + %4 = sext <8 x i1> %3 to <8 x i32> + ret <8 x i32> %4 } -define <4 x i64> @ugt_3_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_3_v4i64: +define <4 x i64> @ugt_1_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_1_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm4 +; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_3_v4i64: +; AVX2-LABEL: ugt_1_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -11701,83 +9505,65 @@ define <4 x i64> @ugt_3_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_3_v4i64: +; BITALG_NOVLX-LABEL: ugt_1_v4i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG_NOVLX-NEXT: vpaddq %ymm1, %ymm0, %ymm1 +; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] -; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_3_v4i64: +; BITALG-LABEL: ugt_1_v4i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 +; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG-NEXT: vpaddq %ymm1, %ymm0, %ymm1 +; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; BITALG-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; BITALG-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_4_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_4_v4i64: +define <4 x i64> @ult_2_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_2_v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm4 -; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpsadbw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm5 -; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm5 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm3 +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_4_v4i64: +; AVX2-LABEL: ult_2_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_4_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_2_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -11785,33 +9571,31 @@ define <4 x i64> @ult_4_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_4_v4i64: +; BITALG_NOVLX-LABEL: ult_2_v4i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG_NOVLX-NEXT: vpaddq %ymm1, %ymm0, %ymm1 +; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] -; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; BITALG_NOVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_4_v4i64: +; BITALG-LABEL: ult_2_v4i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %ymm0, %ymm0 +; BITALG-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; BITALG-NEXT: vpaddq %ymm1, %ymm0, %ymm1 +; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 -; BITALG-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; BITALG-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_4_v4i64: +define <4 x i64> @ugt_2_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_2_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11831,13 +9615,13 @@ define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2,2] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_4_v4i64: +; AVX2-LABEL: ugt_2_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -11849,19 +9633,19 @@ define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -11869,17 +9653,17 @@ define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_4_v4i64: +; BITALG_NOVLX-LABEL: ugt_2_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_4_v4i64: +; BITALG-LABEL: ugt_2_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11889,13 +9673,13 @@ define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_5_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_5_v4i64: +define <4 x i64> @ult_3_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_3_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11915,13 +9699,13 @@ define <4 x i64> @ult_5_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_5_v4i64: +; AVX2-LABEL: ult_3_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -11933,19 +9717,19 @@ define <4 x i64> @ult_5_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_5_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_3_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -11953,17 +9737,17 @@ define <4 x i64> @ult_5_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_5_v4i64: +; BITALG_NOVLX-LABEL: ult_3_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_5_v4i64: +; BITALG-LABEL: ult_3_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -11973,13 +9757,13 @@ define <4 x i64> @ult_5_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_5_v4i64: +define <4 x i64> @ugt_3_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_3_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -11999,13 +9783,13 @@ define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3,3] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_5_v4i64: +; AVX2-LABEL: ugt_3_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12017,19 +9801,19 @@ define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12037,17 +9821,17 @@ define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_5_v4i64: +; BITALG_NOVLX-LABEL: ugt_3_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [3,3,3,3] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_5_v4i64: +; BITALG-LABEL: ugt_3_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12057,13 +9841,13 @@ define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_6_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_6_v4i64: +define <4 x i64> @ult_4_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_4_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12083,13 +9867,13 @@ define <4 x i64> @ult_6_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_6_v4i64: +; AVX2-LABEL: ult_4_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12101,19 +9885,19 @@ define <4 x i64> @ult_6_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_6_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_4_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12121,17 +9905,17 @@ define <4 x i64> @ult_6_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_6_v4i64: +; BITALG_NOVLX-LABEL: ult_4_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_6_v4i64: +; BITALG-LABEL: ult_4_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12141,13 +9925,13 @@ define <4 x i64> @ult_6_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_6_v4i64: +define <4 x i64> @ugt_4_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_4_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12167,13 +9951,13 @@ define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_6_v4i64: +; AVX2-LABEL: ugt_4_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12185,19 +9969,19 @@ define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12205,17 +9989,17 @@ define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_6_v4i64: +; BITALG_NOVLX-LABEL: ugt_4_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4,4,4,4] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_6_v4i64: +; BITALG-LABEL: ugt_4_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12225,13 +10009,13 @@ define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_7_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_7_v4i64: +define <4 x i64> @ult_5_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_5_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12251,13 +10035,13 @@ define <4 x i64> @ult_7_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_7_v4i64: +; AVX2-LABEL: ult_5_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12269,19 +10053,19 @@ define <4 x i64> @ult_7_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_7_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_5_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12289,17 +10073,17 @@ define <4 x i64> @ult_7_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_7_v4i64: +; BITALG_NOVLX-LABEL: ult_5_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_7_v4i64: +; BITALG-LABEL: ult_5_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12309,13 +10093,13 @@ define <4 x i64> @ult_7_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_7_v4i64: +define <4 x i64> @ugt_5_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_5_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12335,13 +10119,13 @@ define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [5,5] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_7_v4i64: +; AVX2-LABEL: ugt_5_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12353,19 +10137,19 @@ define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12373,17 +10157,17 @@ define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_7_v4i64: +; BITALG_NOVLX-LABEL: ugt_5_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,5,5,5] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_7_v4i64: +; BITALG-LABEL: ugt_5_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12393,13 +10177,13 @@ define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_8_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_8_v4i64: +define <4 x i64> @ult_6_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_6_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12419,13 +10203,13 @@ define <4 x i64> @ult_8_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_8_v4i64: +; AVX2-LABEL: ult_6_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12437,19 +10221,19 @@ define <4 x i64> @ult_8_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_6_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12457,17 +10241,17 @@ define <4 x i64> @ult_8_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_8_v4i64: +; BITALG_NOVLX-LABEL: ult_6_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_8_v4i64: +; BITALG-LABEL: ult_6_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12477,13 +10261,13 @@ define <4 x i64> @ult_8_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_8_v4i64: +define <4 x i64> @ugt_6_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_6_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12503,13 +10287,13 @@ define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [6,6] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_8_v4i64: +; AVX2-LABEL: ugt_6_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12521,19 +10305,19 @@ define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12541,17 +10325,17 @@ define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_8_v4i64: +; BITALG_NOVLX-LABEL: ugt_6_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [6,6,6,6] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_8_v4i64: +; BITALG-LABEL: ugt_6_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12561,13 +10345,13 @@ define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_9_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_9_v4i64: +define <4 x i64> @ult_7_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_7_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12587,13 +10371,13 @@ define <4 x i64> @ult_9_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_9_v4i64: +; AVX2-LABEL: ult_7_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12605,19 +10389,19 @@ define <4 x i64> @ult_9_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_7_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12625,17 +10409,17 @@ define <4 x i64> @ult_9_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_9_v4i64: +; BITALG_NOVLX-LABEL: ult_7_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_9_v4i64: +; BITALG-LABEL: ult_7_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12645,13 +10429,13 @@ define <4 x i64> @ult_9_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_9_v4i64: +define <4 x i64> @ugt_7_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_7_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12671,13 +10455,13 @@ define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_9_v4i64: +; AVX2-LABEL: ugt_7_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12689,19 +10473,19 @@ define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12709,17 +10493,17 @@ define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_9_v4i64: +; BITALG_NOVLX-LABEL: ugt_7_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7,7,7,7] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_9_v4i64: +; BITALG-LABEL: ugt_7_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12729,13 +10513,13 @@ define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_10_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_10_v4i64: +define <4 x i64> @ult_8_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_8_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12755,13 +10539,13 @@ define <4 x i64> @ult_10_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_10_v4i64: +; AVX2-LABEL: ult_8_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12773,19 +10557,19 @@ define <4 x i64> @ult_10_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_10_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_8_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12793,17 +10577,17 @@ define <4 x i64> @ult_10_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_10_v4i64: +; BITALG_NOVLX-LABEL: ult_8_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_10_v4i64: +; BITALG-LABEL: ult_8_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12813,13 +10597,13 @@ define <4 x i64> @ult_10_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_10_v4i64: +define <4 x i64> @ugt_8_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_8_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12839,13 +10623,13 @@ define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_10_v4i64: +; AVX2-LABEL: ugt_8_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12857,19 +10641,19 @@ define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12877,17 +10661,17 @@ define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_10_v4i64: +; BITALG_NOVLX-LABEL: ugt_8_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8,8,8,8] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_10_v4i64: +; BITALG-LABEL: ugt_8_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12897,13 +10681,13 @@ define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_11_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_11_v4i64: +define <4 x i64> @ult_9_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_9_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -12923,13 +10707,13 @@ define <4 x i64> @ult_11_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_11_v4i64: +; AVX2-LABEL: ult_9_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -12941,19 +10725,19 @@ define <4 x i64> @ult_11_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_11_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_9_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -12961,17 +10745,17 @@ define <4 x i64> @ult_11_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_11_v4i64: +; BITALG_NOVLX-LABEL: ult_9_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_11_v4i64: +; BITALG-LABEL: ult_9_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -12981,13 +10765,13 @@ define <4 x i64> @ult_11_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_11_v4i64: +define <4 x i64> @ugt_9_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_9_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13007,13 +10791,13 @@ define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9,9] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_11_v4i64: +; AVX2-LABEL: ugt_9_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13025,19 +10809,19 @@ define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13045,17 +10829,17 @@ define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_11_v4i64: +; BITALG_NOVLX-LABEL: ugt_9_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9,9,9,9] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_11_v4i64: +; BITALG-LABEL: ugt_9_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13065,13 +10849,13 @@ define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_12_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_12_v4i64: +define <4 x i64> @ult_10_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_10_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13091,13 +10875,13 @@ define <4 x i64> @ult_12_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_12_v4i64: +; AVX2-LABEL: ult_10_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13109,19 +10893,19 @@ define <4 x i64> @ult_12_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_12_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_10_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13129,17 +10913,17 @@ define <4 x i64> @ult_12_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_12_v4i64: +; BITALG_NOVLX-LABEL: ult_10_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_12_v4i64: +; BITALG-LABEL: ult_10_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13149,13 +10933,13 @@ define <4 x i64> @ult_12_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_12_v4i64: +define <4 x i64> @ugt_10_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_10_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13175,13 +10959,13 @@ define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [10,10] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_12_v4i64: +; AVX2-LABEL: ugt_10_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13193,19 +10977,19 @@ define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13213,17 +10997,17 @@ define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_12_v4i64: +; BITALG_NOVLX-LABEL: ugt_10_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [10,10,10,10] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_12_v4i64: +; BITALG-LABEL: ugt_10_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13233,13 +11017,13 @@ define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_13_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_13_v4i64: +define <4 x i64> @ult_11_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_11_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13259,13 +11043,13 @@ define <4 x i64> @ult_13_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_13_v4i64: +; AVX2-LABEL: ult_11_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13277,19 +11061,19 @@ define <4 x i64> @ult_13_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_13_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_11_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13297,17 +11081,17 @@ define <4 x i64> @ult_13_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_13_v4i64: +; BITALG_NOVLX-LABEL: ult_11_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_13_v4i64: +; BITALG-LABEL: ult_11_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13317,13 +11101,13 @@ define <4 x i64> @ult_13_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_13_v4i64: +define <4 x i64> @ugt_11_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_11_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13343,13 +11127,13 @@ define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [11,11] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_13_v4i64: +; AVX2-LABEL: ugt_11_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13361,19 +11145,19 @@ define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13381,17 +11165,17 @@ define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_13_v4i64: +; BITALG_NOVLX-LABEL: ugt_11_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [11,11,11,11] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_13_v4i64: +; BITALG-LABEL: ugt_11_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13401,13 +11185,13 @@ define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_14_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_14_v4i64: +define <4 x i64> @ult_12_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_12_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13427,13 +11211,13 @@ define <4 x i64> @ult_14_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_14_v4i64: +; AVX2-LABEL: ult_12_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13445,19 +11229,19 @@ define <4 x i64> @ult_14_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_14_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_12_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13465,17 +11249,17 @@ define <4 x i64> @ult_14_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_14_v4i64: +; BITALG_NOVLX-LABEL: ult_12_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_14_v4i64: +; BITALG-LABEL: ult_12_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13485,13 +11269,13 @@ define <4 x i64> @ult_14_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_14_v4i64: +define <4 x i64> @ugt_12_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_12_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13511,13 +11295,13 @@ define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [12,12] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_14_v4i64: +; AVX2-LABEL: ugt_12_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13529,19 +11313,19 @@ define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13549,17 +11333,17 @@ define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_14_v4i64: +; BITALG_NOVLX-LABEL: ugt_12_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [12,12,12,12] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_14_v4i64: +; BITALG-LABEL: ugt_12_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13569,13 +11353,13 @@ define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_15_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_15_v4i64: +define <4 x i64> @ult_13_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_13_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13595,13 +11379,13 @@ define <4 x i64> @ult_15_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_15_v4i64: +; AVX2-LABEL: ult_13_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13613,19 +11397,19 @@ define <4 x i64> @ult_15_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_15_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_13_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13633,17 +11417,17 @@ define <4 x i64> @ult_15_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_15_v4i64: +; BITALG_NOVLX-LABEL: ult_13_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_15_v4i64: +; BITALG-LABEL: ult_13_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13653,13 +11437,13 @@ define <4 x i64> @ult_15_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_15_v4i64: +define <4 x i64> @ugt_13_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_13_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13679,13 +11463,13 @@ define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [13,13] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_15_v4i64: +; AVX2-LABEL: ugt_13_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13697,19 +11481,19 @@ define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13717,17 +11501,17 @@ define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_15_v4i64: +; BITALG_NOVLX-LABEL: ugt_13_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [13,13,13,13] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_15_v4i64: +; BITALG-LABEL: ugt_13_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13737,13 +11521,13 @@ define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_16_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_16_v4i64: +define <4 x i64> @ult_14_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_14_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13763,13 +11547,13 @@ define <4 x i64> @ult_16_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_16_v4i64: +; AVX2-LABEL: ult_14_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13781,19 +11565,19 @@ define <4 x i64> @ult_16_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_16_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_14_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13801,17 +11585,17 @@ define <4 x i64> @ult_16_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_16_v4i64: +; BITALG_NOVLX-LABEL: ult_14_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_16_v4i64: +; BITALG-LABEL: ult_14_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13821,13 +11605,13 @@ define <4 x i64> @ult_16_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_16_v4i64: +define <4 x i64> @ugt_14_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_14_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13847,13 +11631,13 @@ define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [14,14] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_16_v4i64: +; AVX2-LABEL: ugt_14_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13865,19 +11649,19 @@ define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13885,17 +11669,17 @@ define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_16_v4i64: +; BITALG_NOVLX-LABEL: ugt_14_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [14,14,14,14] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_16_v4i64: +; BITALG-LABEL: ugt_14_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13905,13 +11689,13 @@ define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_17_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_17_v4i64: +define <4 x i64> @ult_15_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_15_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -13931,13 +11715,13 @@ define <4 x i64> @ult_17_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_17_v4i64: +; AVX2-LABEL: ult_15_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -13949,19 +11733,19 @@ define <4 x i64> @ult_17_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_17_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_15_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -13969,17 +11753,17 @@ define <4 x i64> @ult_17_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_17_v4i64: +; BITALG_NOVLX-LABEL: ult_15_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_17_v4i64: +; BITALG-LABEL: ult_15_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -13989,13 +11773,13 @@ define <4 x i64> @ult_17_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_17_v4i64: +define <4 x i64> @ugt_15_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_15_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14015,13 +11799,13 @@ define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_17_v4i64: +; AVX2-LABEL: ugt_15_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14033,19 +11817,19 @@ define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14053,17 +11837,17 @@ define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_17_v4i64: +; BITALG_NOVLX-LABEL: ugt_15_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [15,15,15,15] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_17_v4i64: +; BITALG-LABEL: ugt_15_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14073,13 +11857,13 @@ define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_18_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_18_v4i64: +define <4 x i64> @ult_16_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_16_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14099,13 +11883,13 @@ define <4 x i64> @ult_18_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_18_v4i64: +; AVX2-LABEL: ult_16_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14117,19 +11901,19 @@ define <4 x i64> @ult_18_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_18_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_16_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14137,17 +11921,17 @@ define <4 x i64> @ult_18_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_18_v4i64: +; BITALG_NOVLX-LABEL: ult_16_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_18_v4i64: +; BITALG-LABEL: ult_16_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14157,13 +11941,13 @@ define <4 x i64> @ult_18_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_18_v4i64: +define <4 x i64> @ugt_16_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_16_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14183,13 +11967,13 @@ define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_18_v4i64: +; AVX2-LABEL: ugt_16_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14201,19 +11985,19 @@ define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14221,17 +12005,17 @@ define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_18_v4i64: +; BITALG_NOVLX-LABEL: ugt_16_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [16,16,16,16] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_18_v4i64: +; BITALG-LABEL: ugt_16_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14241,13 +12025,13 @@ define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_19_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_19_v4i64: +define <4 x i64> @ult_17_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_17_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14267,13 +12051,13 @@ define <4 x i64> @ult_19_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_19_v4i64: +; AVX2-LABEL: ult_17_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14285,19 +12069,19 @@ define <4 x i64> @ult_19_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_19_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_17_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14305,17 +12089,17 @@ define <4 x i64> @ult_19_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_19_v4i64: +; BITALG_NOVLX-LABEL: ult_17_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_19_v4i64: +; BITALG-LABEL: ult_17_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14325,13 +12109,13 @@ define <4 x i64> @ult_19_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_19_v4i64: +define <4 x i64> @ugt_17_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_17_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14351,13 +12135,13 @@ define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [17,17] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_19_v4i64: +; AVX2-LABEL: ugt_17_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14369,19 +12153,19 @@ define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14389,17 +12173,17 @@ define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_19_v4i64: +; BITALG_NOVLX-LABEL: ugt_17_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17,17,17,17] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_19_v4i64: +; BITALG-LABEL: ugt_17_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14409,13 +12193,13 @@ define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_20_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_20_v4i64: +define <4 x i64> @ult_18_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_18_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14435,13 +12219,13 @@ define <4 x i64> @ult_20_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_20_v4i64: +; AVX2-LABEL: ult_18_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14453,19 +12237,19 @@ define <4 x i64> @ult_20_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_20_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_18_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14473,17 +12257,17 @@ define <4 x i64> @ult_20_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_20_v4i64: +; BITALG_NOVLX-LABEL: ult_18_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_20_v4i64: +; BITALG-LABEL: ult_18_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14493,13 +12277,13 @@ define <4 x i64> @ult_20_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_20_v4i64: +define <4 x i64> @ugt_18_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_18_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14519,13 +12303,13 @@ define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [18,18] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_20_v4i64: +; AVX2-LABEL: ugt_18_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14537,19 +12321,19 @@ define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14557,17 +12341,17 @@ define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_20_v4i64: +; BITALG_NOVLX-LABEL: ugt_18_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18,18,18,18] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_20_v4i64: +; BITALG-LABEL: ugt_18_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14577,13 +12361,13 @@ define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_21_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_21_v4i64: +define <4 x i64> @ult_19_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_19_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14603,13 +12387,13 @@ define <4 x i64> @ult_21_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_21_v4i64: +; AVX2-LABEL: ult_19_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14621,19 +12405,19 @@ define <4 x i64> @ult_21_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_21_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_19_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14641,17 +12425,17 @@ define <4 x i64> @ult_21_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_21_v4i64: +; BITALG_NOVLX-LABEL: ult_19_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_21_v4i64: +; BITALG-LABEL: ult_19_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14661,13 +12445,13 @@ define <4 x i64> @ult_21_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_21_v4i64: +define <4 x i64> @ugt_19_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_19_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14687,13 +12471,13 @@ define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [19,19] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_21_v4i64: +; AVX2-LABEL: ugt_19_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14705,19 +12489,19 @@ define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14725,17 +12509,17 @@ define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_21_v4i64: +; BITALG_NOVLX-LABEL: ugt_19_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [19,19,19,19] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_21_v4i64: +; BITALG-LABEL: ugt_19_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14745,13 +12529,13 @@ define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_22_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_22_v4i64: +define <4 x i64> @ult_20_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_20_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14771,13 +12555,13 @@ define <4 x i64> @ult_22_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_22_v4i64: +; AVX2-LABEL: ult_20_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14789,19 +12573,19 @@ define <4 x i64> @ult_22_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_22_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_20_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14809,17 +12593,17 @@ define <4 x i64> @ult_22_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_22_v4i64: +; BITALG_NOVLX-LABEL: ult_20_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_22_v4i64: +; BITALG-LABEL: ult_20_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14829,13 +12613,13 @@ define <4 x i64> @ult_22_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_22_v4i64: +define <4 x i64> @ugt_20_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_20_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14855,13 +12639,13 @@ define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [20,20] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_22_v4i64: +; AVX2-LABEL: ugt_20_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14873,19 +12657,19 @@ define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14893,17 +12677,17 @@ define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_22_v4i64: +; BITALG_NOVLX-LABEL: ugt_20_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [20,20,20,20] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_22_v4i64: +; BITALG-LABEL: ugt_20_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14913,13 +12697,13 @@ define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_23_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_23_v4i64: +define <4 x i64> @ult_21_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_21_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -14939,13 +12723,13 @@ define <4 x i64> @ult_23_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_23_v4i64: +; AVX2-LABEL: ult_21_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -14957,19 +12741,19 @@ define <4 x i64> @ult_23_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_23_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_21_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -14977,17 +12761,17 @@ define <4 x i64> @ult_23_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_23_v4i64: +; BITALG_NOVLX-LABEL: ult_21_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_23_v4i64: +; BITALG-LABEL: ult_21_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -14997,13 +12781,13 @@ define <4 x i64> @ult_23_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_23_v4i64: +define <4 x i64> @ugt_21_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_21_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15023,13 +12807,13 @@ define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [21,21] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_23_v4i64: +; AVX2-LABEL: ugt_21_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15041,19 +12825,19 @@ define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15061,17 +12845,17 @@ define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_23_v4i64: +; BITALG_NOVLX-LABEL: ugt_21_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [21,21,21,21] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_23_v4i64: +; BITALG-LABEL: ugt_21_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15081,13 +12865,13 @@ define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_24_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_24_v4i64: +define <4 x i64> @ult_22_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_22_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15107,13 +12891,13 @@ define <4 x i64> @ult_24_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_24_v4i64: +; AVX2-LABEL: ult_22_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15125,19 +12909,19 @@ define <4 x i64> @ult_24_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_24_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_22_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15145,17 +12929,17 @@ define <4 x i64> @ult_24_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_24_v4i64: +; BITALG_NOVLX-LABEL: ult_22_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_24_v4i64: +; BITALG-LABEL: ult_22_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15165,13 +12949,13 @@ define <4 x i64> @ult_24_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_24_v4i64: +define <4 x i64> @ugt_22_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_22_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15191,13 +12975,13 @@ define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [22,22] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_24_v4i64: +; AVX2-LABEL: ugt_22_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15209,19 +12993,19 @@ define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15229,17 +13013,17 @@ define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_24_v4i64: +; BITALG_NOVLX-LABEL: ugt_22_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [22,22,22,22] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_24_v4i64: +; BITALG-LABEL: ugt_22_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15249,13 +13033,13 @@ define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_25_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_25_v4i64: +define <4 x i64> @ult_23_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_23_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15275,13 +13059,13 @@ define <4 x i64> @ult_25_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_25_v4i64: +; AVX2-LABEL: ult_23_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15293,19 +13077,19 @@ define <4 x i64> @ult_25_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_25_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_23_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15313,17 +13097,17 @@ define <4 x i64> @ult_25_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_25_v4i64: +; BITALG_NOVLX-LABEL: ult_23_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_25_v4i64: +; BITALG-LABEL: ult_23_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15333,13 +13117,13 @@ define <4 x i64> @ult_25_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_25_v4i64: +define <4 x i64> @ugt_23_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_23_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15359,13 +13143,13 @@ define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [23,23] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_25_v4i64: +; AVX2-LABEL: ugt_23_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15377,19 +13161,19 @@ define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15397,17 +13181,17 @@ define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_25_v4i64: +; BITALG_NOVLX-LABEL: ugt_23_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [23,23,23,23] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_25_v4i64: +; BITALG-LABEL: ugt_23_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15417,13 +13201,13 @@ define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_26_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_26_v4i64: +define <4 x i64> @ult_24_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_24_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15443,13 +13227,13 @@ define <4 x i64> @ult_26_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_26_v4i64: +; AVX2-LABEL: ult_24_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15461,19 +13245,19 @@ define <4 x i64> @ult_26_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_26_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_24_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15481,17 +13265,17 @@ define <4 x i64> @ult_26_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_26_v4i64: +; BITALG_NOVLX-LABEL: ult_24_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_26_v4i64: +; BITALG-LABEL: ult_24_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15501,13 +13285,13 @@ define <4 x i64> @ult_26_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_26_v4i64: +define <4 x i64> @ugt_24_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_24_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15527,13 +13311,13 @@ define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [24,24] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_26_v4i64: +; AVX2-LABEL: ugt_24_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15545,19 +13329,19 @@ define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15565,17 +13349,17 @@ define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_26_v4i64: +; BITALG_NOVLX-LABEL: ugt_24_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [24,24,24,24] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_26_v4i64: +; BITALG-LABEL: ugt_24_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15585,13 +13369,13 @@ define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_27_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_27_v4i64: +define <4 x i64> @ult_25_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_25_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15611,13 +13395,13 @@ define <4 x i64> @ult_27_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_27_v4i64: +; AVX2-LABEL: ult_25_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15629,19 +13413,19 @@ define <4 x i64> @ult_27_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_27_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_25_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15649,17 +13433,17 @@ define <4 x i64> @ult_27_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_27_v4i64: +; BITALG_NOVLX-LABEL: ult_25_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_27_v4i64: +; BITALG-LABEL: ult_25_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15669,13 +13453,13 @@ define <4 x i64> @ult_27_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_27_v4i64: +define <4 x i64> @ugt_25_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_25_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15695,13 +13479,13 @@ define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [25,25] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_27_v4i64: +; AVX2-LABEL: ugt_25_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15713,19 +13497,19 @@ define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15733,17 +13517,17 @@ define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_27_v4i64: +; BITALG_NOVLX-LABEL: ugt_25_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25,25,25,25] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_27_v4i64: +; BITALG-LABEL: ugt_25_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15753,13 +13537,13 @@ define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_28_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_28_v4i64: +define <4 x i64> @ult_26_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_26_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15779,13 +13563,13 @@ define <4 x i64> @ult_28_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_28_v4i64: +; AVX2-LABEL: ult_26_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15797,19 +13581,19 @@ define <4 x i64> @ult_28_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_28_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_26_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15817,17 +13601,17 @@ define <4 x i64> @ult_28_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_28_v4i64: +; BITALG_NOVLX-LABEL: ult_26_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_28_v4i64: +; BITALG-LABEL: ult_26_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15837,13 +13621,13 @@ define <4 x i64> @ult_28_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_28_v4i64: +define <4 x i64> @ugt_26_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_26_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15863,13 +13647,13 @@ define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [26,26] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_28_v4i64: +; AVX2-LABEL: ugt_26_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15881,19 +13665,19 @@ define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15901,17 +13685,17 @@ define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_28_v4i64: +; BITALG_NOVLX-LABEL: ugt_26_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [26,26,26,26] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_28_v4i64: +; BITALG-LABEL: ugt_26_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -15921,13 +13705,13 @@ define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_29_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_29_v4i64: +define <4 x i64> @ult_27_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_27_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -15947,13 +13731,13 @@ define <4 x i64> @ult_29_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_29_v4i64: +; AVX2-LABEL: ult_27_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -15965,19 +13749,19 @@ define <4 x i64> @ult_29_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_29_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_27_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -15985,17 +13769,17 @@ define <4 x i64> @ult_29_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_29_v4i64: +; BITALG_NOVLX-LABEL: ult_27_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_29_v4i64: +; BITALG-LABEL: ult_27_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16005,13 +13789,13 @@ define <4 x i64> @ult_29_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_29_v4i64: +define <4 x i64> @ugt_27_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_27_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16031,13 +13815,13 @@ define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [27,27] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_29_v4i64: +; AVX2-LABEL: ugt_27_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16049,19 +13833,19 @@ define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16069,17 +13853,17 @@ define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_29_v4i64: +; BITALG_NOVLX-LABEL: ugt_27_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [27,27,27,27] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_29_v4i64: +; BITALG-LABEL: ugt_27_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16089,13 +13873,13 @@ define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_30_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_30_v4i64: +define <4 x i64> @ult_28_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_28_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16115,13 +13899,13 @@ define <4 x i64> @ult_30_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_30_v4i64: +; AVX2-LABEL: ult_28_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16133,19 +13917,19 @@ define <4 x i64> @ult_30_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_30_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_28_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16153,17 +13937,17 @@ define <4 x i64> @ult_30_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_30_v4i64: +; BITALG_NOVLX-LABEL: ult_28_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_30_v4i64: +; BITALG-LABEL: ult_28_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16173,13 +13957,13 @@ define <4 x i64> @ult_30_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_30_v4i64: +define <4 x i64> @ugt_28_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_28_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16199,13 +13983,13 @@ define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [28,28] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_30_v4i64: +; AVX2-LABEL: ugt_28_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16217,19 +14001,19 @@ define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16237,17 +14021,17 @@ define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_30_v4i64: +; BITALG_NOVLX-LABEL: ugt_28_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [28,28,28,28] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_30_v4i64: +; BITALG-LABEL: ugt_28_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16257,13 +14041,13 @@ define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_31_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_31_v4i64: +define <4 x i64> @ult_29_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_29_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16283,13 +14067,13 @@ define <4 x i64> @ult_31_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_31_v4i64: +; AVX2-LABEL: ult_29_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16301,19 +14085,19 @@ define <4 x i64> @ult_31_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_31_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_29_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16321,17 +14105,17 @@ define <4 x i64> @ult_31_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_31_v4i64: +; BITALG_NOVLX-LABEL: ult_29_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_31_v4i64: +; BITALG-LABEL: ult_29_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16341,13 +14125,13 @@ define <4 x i64> @ult_31_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_31_v4i64: +define <4 x i64> @ugt_29_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_29_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16367,13 +14151,13 @@ define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [29,29] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_31_v4i64: +; AVX2-LABEL: ugt_29_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16385,19 +14169,19 @@ define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_31_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_31_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16405,17 +14189,17 @@ define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_31_v4i64: +; BITALG_NOVLX-LABEL: ugt_29_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [29,29,29,29] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_31_v4i64: +; BITALG-LABEL: ugt_29_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16425,13 +14209,13 @@ define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_32_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_32_v4i64: +define <4 x i64> @ult_30_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_30_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16451,13 +14235,13 @@ define <4 x i64> @ult_32_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_32_v4i64: +; AVX2-LABEL: ult_30_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16469,19 +14253,19 @@ define <4 x i64> @ult_32_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_32_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_30_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_32_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16489,17 +14273,17 @@ define <4 x i64> @ult_32_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_32_v4i64: +; BITALG_NOVLX-LABEL: ult_30_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_32_v4i64: +; BITALG-LABEL: ult_30_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16509,13 +14293,13 @@ define <4 x i64> @ult_32_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_32_v4i64: +define <4 x i64> @ugt_30_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_30_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16535,13 +14319,13 @@ define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [30,30] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_32_v4i64: +; AVX2-LABEL: ugt_30_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16553,19 +14337,19 @@ define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_32_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_32_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16573,17 +14357,17 @@ define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_32_v4i64: +; BITALG_NOVLX-LABEL: ugt_30_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [30,30,30,30] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_32_v4i64: +; BITALG-LABEL: ugt_30_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16593,13 +14377,13 @@ define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_33_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_33_v4i64: +define <4 x i64> @ult_31_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_31_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16619,13 +14403,13 @@ define <4 x i64> @ult_33_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_33_v4i64: +; AVX2-LABEL: ult_31_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16637,19 +14421,19 @@ define <4 x i64> @ult_33_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_33_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_31_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_33_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16657,17 +14441,17 @@ define <4 x i64> @ult_33_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_33_v4i64: +; BITALG_NOVLX-LABEL: ult_31_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_33_v4i64: +; BITALG-LABEL: ult_31_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16677,13 +14461,13 @@ define <4 x i64> @ult_33_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_33_v4i64: +define <4 x i64> @ugt_31_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_31_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16703,13 +14487,13 @@ define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [31,31] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_33_v4i64: +; AVX2-LABEL: ugt_31_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16721,19 +14505,19 @@ define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_33_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_31_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_33_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_31_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16741,17 +14525,17 @@ define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_33_v4i64: +; BITALG_NOVLX-LABEL: ugt_31_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [31,31,31,31] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_33_v4i64: +; BITALG-LABEL: ugt_31_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16761,13 +14545,13 @@ define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_34_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_34_v4i64: +define <4 x i64> @ult_32_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_32_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16787,13 +14571,13 @@ define <4 x i64> @ult_34_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_34_v4i64: +; AVX2-LABEL: ult_32_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16805,19 +14589,19 @@ define <4 x i64> @ult_34_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_34_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_32_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_34_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_32_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16825,17 +14609,17 @@ define <4 x i64> @ult_34_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_34_v4i64: +; BITALG_NOVLX-LABEL: ult_32_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_34_v4i64: +; BITALG-LABEL: ult_32_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16845,13 +14629,13 @@ define <4 x i64> @ult_34_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_34_v4i64: +define <4 x i64> @ugt_32_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_32_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16871,13 +14655,13 @@ define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32,32] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_34_v4i64: +; AVX2-LABEL: ugt_32_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16889,19 +14673,19 @@ define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_34_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_32_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_34_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_32_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16909,17 +14693,17 @@ define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_34_v4i64: +; BITALG_NOVLX-LABEL: ugt_32_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32,32,32,32] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_34_v4i64: +; BITALG-LABEL: ugt_32_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -16929,13 +14713,13 @@ define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_35_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_35_v4i64: +define <4 x i64> @ult_33_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_33_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -16955,13 +14739,13 @@ define <4 x i64> @ult_35_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_35_v4i64: +; AVX2-LABEL: ult_33_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -16973,19 +14757,19 @@ define <4 x i64> @ult_35_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_35_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_33_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_35_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_33_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -16993,17 +14777,17 @@ define <4 x i64> @ult_35_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_35_v4i64: +; BITALG_NOVLX-LABEL: ult_33_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_35_v4i64: +; BITALG-LABEL: ult_33_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17013,13 +14797,13 @@ define <4 x i64> @ult_35_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_35_v4i64: +define <4 x i64> @ugt_33_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_33_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17039,13 +14823,13 @@ define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [33,33] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_35_v4i64: +; AVX2-LABEL: ugt_33_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17057,19 +14841,19 @@ define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_35_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_33_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_35_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_33_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17077,17 +14861,17 @@ define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_35_v4i64: +; BITALG_NOVLX-LABEL: ugt_33_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [33,33,33,33] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_35_v4i64: +; BITALG-LABEL: ugt_33_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17097,13 +14881,13 @@ define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_36_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_36_v4i64: +define <4 x i64> @ult_34_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_34_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17123,13 +14907,13 @@ define <4 x i64> @ult_36_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_36_v4i64: +; AVX2-LABEL: ult_34_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17141,19 +14925,19 @@ define <4 x i64> @ult_36_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_36_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_34_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_36_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_34_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17161,17 +14945,17 @@ define <4 x i64> @ult_36_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_36_v4i64: +; BITALG_NOVLX-LABEL: ult_34_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_36_v4i64: +; BITALG-LABEL: ult_34_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17181,13 +14965,13 @@ define <4 x i64> @ult_36_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_36_v4i64: +define <4 x i64> @ugt_34_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_34_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17207,13 +14991,13 @@ define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [34,34] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_36_v4i64: +; AVX2-LABEL: ugt_34_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17225,19 +15009,19 @@ define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_36_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_34_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_36_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_34_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17245,17 +15029,17 @@ define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_36_v4i64: +; BITALG_NOVLX-LABEL: ugt_34_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [34,34,34,34] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_36_v4i64: +; BITALG-LABEL: ugt_34_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17265,13 +15049,13 @@ define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_37_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_37_v4i64: +define <4 x i64> @ult_35_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_35_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17291,13 +15075,13 @@ define <4 x i64> @ult_37_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_37_v4i64: +; AVX2-LABEL: ult_35_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17309,19 +15093,19 @@ define <4 x i64> @ult_37_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_37_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_35_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_37_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_35_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17329,17 +15113,17 @@ define <4 x i64> @ult_37_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_37_v4i64: +; BITALG_NOVLX-LABEL: ult_35_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_37_v4i64: +; BITALG-LABEL: ult_35_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17349,13 +15133,13 @@ define <4 x i64> @ult_37_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_37_v4i64: +define <4 x i64> @ugt_35_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_35_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17375,13 +15159,13 @@ define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [35,35] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_37_v4i64: +; AVX2-LABEL: ugt_35_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17393,19 +15177,19 @@ define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_37_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_35_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_37_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_35_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17413,17 +15197,17 @@ define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_37_v4i64: +; BITALG_NOVLX-LABEL: ugt_35_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [35,35,35,35] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_37_v4i64: +; BITALG-LABEL: ugt_35_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17433,13 +15217,13 @@ define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_38_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_38_v4i64: +define <4 x i64> @ult_36_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_36_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17459,13 +15243,13 @@ define <4 x i64> @ult_38_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_38_v4i64: +; AVX2-LABEL: ult_36_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17477,19 +15261,19 @@ define <4 x i64> @ult_38_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_38_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_36_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_38_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_36_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17497,17 +15281,17 @@ define <4 x i64> @ult_38_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_38_v4i64: +; BITALG_NOVLX-LABEL: ult_36_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_38_v4i64: +; BITALG-LABEL: ult_36_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17517,13 +15301,13 @@ define <4 x i64> @ult_38_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_38_v4i64: +define <4 x i64> @ugt_36_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_36_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17543,13 +15327,13 @@ define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [36,36] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_38_v4i64: +; AVX2-LABEL: ugt_36_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17561,19 +15345,19 @@ define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_38_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_36_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_38_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_36_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17581,17 +15365,17 @@ define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_38_v4i64: +; BITALG_NOVLX-LABEL: ugt_36_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [36,36,36,36] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_38_v4i64: +; BITALG-LABEL: ugt_36_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17601,13 +15385,13 @@ define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_39_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_39_v4i64: +define <4 x i64> @ult_37_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_37_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17627,13 +15411,13 @@ define <4 x i64> @ult_39_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_39_v4i64: +; AVX2-LABEL: ult_37_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17645,19 +15429,19 @@ define <4 x i64> @ult_39_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_39_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_37_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_39_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_37_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17665,17 +15449,17 @@ define <4 x i64> @ult_39_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_39_v4i64: +; BITALG_NOVLX-LABEL: ult_37_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_39_v4i64: +; BITALG-LABEL: ult_37_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17685,13 +15469,13 @@ define <4 x i64> @ult_39_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_39_v4i64: +define <4 x i64> @ugt_37_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_37_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17711,13 +15495,13 @@ define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [37,37] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_39_v4i64: +; AVX2-LABEL: ugt_37_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17729,19 +15513,19 @@ define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_39_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_37_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_39_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_37_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17749,17 +15533,17 @@ define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_39_v4i64: +; BITALG_NOVLX-LABEL: ugt_37_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [37,37,37,37] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_39_v4i64: +; BITALG-LABEL: ugt_37_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17769,13 +15553,13 @@ define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_40_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_40_v4i64: +define <4 x i64> @ult_38_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_38_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17795,13 +15579,13 @@ define <4 x i64> @ult_40_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_40_v4i64: +; AVX2-LABEL: ult_38_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17813,19 +15597,19 @@ define <4 x i64> @ult_40_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_40_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_38_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_40_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_38_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17833,17 +15617,17 @@ define <4 x i64> @ult_40_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_40_v4i64: +; BITALG_NOVLX-LABEL: ult_38_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_40_v4i64: +; BITALG-LABEL: ult_38_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17853,13 +15637,13 @@ define <4 x i64> @ult_40_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_40_v4i64: +define <4 x i64> @ugt_38_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_38_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17879,13 +15663,13 @@ define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [38,38] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_40_v4i64: +; AVX2-LABEL: ugt_38_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17897,19 +15681,19 @@ define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_40_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_38_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_40_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_38_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -17917,17 +15701,17 @@ define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_40_v4i64: +; BITALG_NOVLX-LABEL: ugt_38_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [38,38,38,38] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_40_v4i64: +; BITALG-LABEL: ugt_38_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -17937,13 +15721,13 @@ define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_41_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_41_v4i64: +define <4 x i64> @ult_39_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_39_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -17963,13 +15747,13 @@ define <4 x i64> @ult_41_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_41_v4i64: +; AVX2-LABEL: ult_39_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -17981,19 +15765,19 @@ define <4 x i64> @ult_41_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_41_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_39_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_41_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_39_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18001,17 +15785,17 @@ define <4 x i64> @ult_41_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_41_v4i64: +; BITALG_NOVLX-LABEL: ult_39_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_41_v4i64: +; BITALG-LABEL: ult_39_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18021,13 +15805,13 @@ define <4 x i64> @ult_41_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_41_v4i64: +define <4 x i64> @ugt_39_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_39_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18047,13 +15831,13 @@ define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [39,39] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_41_v4i64: +; AVX2-LABEL: ugt_39_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18065,19 +15849,19 @@ define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_41_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_39_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_41_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_39_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18085,17 +15869,17 @@ define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_41_v4i64: +; BITALG_NOVLX-LABEL: ugt_39_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [39,39,39,39] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_41_v4i64: +; BITALG-LABEL: ugt_39_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18105,13 +15889,13 @@ define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_42_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_42_v4i64: +define <4 x i64> @ult_40_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_40_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18131,13 +15915,13 @@ define <4 x i64> @ult_42_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_42_v4i64: +; AVX2-LABEL: ult_40_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18149,19 +15933,19 @@ define <4 x i64> @ult_42_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_42_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_40_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_42_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_40_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18169,17 +15953,17 @@ define <4 x i64> @ult_42_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_42_v4i64: +; BITALG_NOVLX-LABEL: ult_40_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_42_v4i64: +; BITALG-LABEL: ult_40_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18189,13 +15973,13 @@ define <4 x i64> @ult_42_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_42_v4i64: +define <4 x i64> @ugt_40_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_40_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18215,13 +15999,13 @@ define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [40,40] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_42_v4i64: +; AVX2-LABEL: ugt_40_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18233,19 +16017,19 @@ define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_42_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_40_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_42_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_40_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18253,17 +16037,17 @@ define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_42_v4i64: +; BITALG_NOVLX-LABEL: ugt_40_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [40,40,40,40] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_42_v4i64: +; BITALG-LABEL: ugt_40_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18273,13 +16057,13 @@ define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_43_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_43_v4i64: +define <4 x i64> @ult_41_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_41_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18299,13 +16083,13 @@ define <4 x i64> @ult_43_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_43_v4i64: +; AVX2-LABEL: ult_41_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18317,19 +16101,19 @@ define <4 x i64> @ult_43_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_43_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_41_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_43_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_41_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18337,17 +16121,17 @@ define <4 x i64> @ult_43_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_43_v4i64: +; BITALG_NOVLX-LABEL: ult_41_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_43_v4i64: +; BITALG-LABEL: ult_41_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18357,13 +16141,13 @@ define <4 x i64> @ult_43_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_43_v4i64: +define <4 x i64> @ugt_41_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_41_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18383,13 +16167,13 @@ define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [41,41] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_43_v4i64: +; AVX2-LABEL: ugt_41_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18401,19 +16185,19 @@ define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_43_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_41_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_43_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_41_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18421,17 +16205,17 @@ define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_43_v4i64: +; BITALG_NOVLX-LABEL: ugt_41_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [41,41,41,41] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_43_v4i64: +; BITALG-LABEL: ugt_41_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18441,13 +16225,13 @@ define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_44_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_44_v4i64: +define <4 x i64> @ult_42_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_42_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18467,13 +16251,13 @@ define <4 x i64> @ult_44_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_44_v4i64: +; AVX2-LABEL: ult_42_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18485,19 +16269,19 @@ define <4 x i64> @ult_44_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_44_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_42_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_44_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_42_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18505,17 +16289,17 @@ define <4 x i64> @ult_44_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_44_v4i64: +; BITALG_NOVLX-LABEL: ult_42_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_44_v4i64: +; BITALG-LABEL: ult_42_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18525,13 +16309,13 @@ define <4 x i64> @ult_44_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_44_v4i64: +define <4 x i64> @ugt_42_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_42_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18551,13 +16335,13 @@ define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_44_v4i64: +; AVX2-LABEL: ugt_42_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18569,19 +16353,19 @@ define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_44_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_42_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_44_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_42_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18589,17 +16373,17 @@ define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_44_v4i64: +; BITALG_NOVLX-LABEL: ugt_42_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [42,42,42,42] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_44_v4i64: +; BITALG-LABEL: ugt_42_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18609,13 +16393,13 @@ define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_45_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_45_v4i64: +define <4 x i64> @ult_43_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_43_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18635,13 +16419,13 @@ define <4 x i64> @ult_45_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_45_v4i64: +; AVX2-LABEL: ult_43_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18653,19 +16437,19 @@ define <4 x i64> @ult_45_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_45_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_43_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_45_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_43_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18673,17 +16457,17 @@ define <4 x i64> @ult_45_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_45_v4i64: +; BITALG_NOVLX-LABEL: ult_43_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_45_v4i64: +; BITALG-LABEL: ult_43_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18693,13 +16477,13 @@ define <4 x i64> @ult_45_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_45_v4i64: +define <4 x i64> @ugt_43_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_43_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18719,13 +16503,13 @@ define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [43,43] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_45_v4i64: +; AVX2-LABEL: ugt_43_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18737,19 +16521,19 @@ define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_45_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_43_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_45_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_43_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18757,17 +16541,17 @@ define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_45_v4i64: +; BITALG_NOVLX-LABEL: ugt_43_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [43,43,43,43] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_45_v4i64: +; BITALG-LABEL: ugt_43_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18777,13 +16561,13 @@ define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_46_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_46_v4i64: +define <4 x i64> @ult_44_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_44_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18803,13 +16587,13 @@ define <4 x i64> @ult_46_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_46_v4i64: +; AVX2-LABEL: ult_44_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18821,19 +16605,19 @@ define <4 x i64> @ult_46_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_46_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_44_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_46_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_44_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18841,17 +16625,17 @@ define <4 x i64> @ult_46_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_46_v4i64: +; BITALG_NOVLX-LABEL: ult_44_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_46_v4i64: +; BITALG-LABEL: ult_44_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18861,13 +16645,13 @@ define <4 x i64> @ult_46_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_46_v4i64: +define <4 x i64> @ugt_44_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_44_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18887,13 +16671,13 @@ define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [44,44] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_46_v4i64: +; AVX2-LABEL: ugt_44_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18905,19 +16689,19 @@ define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_46_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_44_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_46_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_44_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -18925,17 +16709,17 @@ define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_46_v4i64: +; BITALG_NOVLX-LABEL: ugt_44_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [44,44,44,44] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_46_v4i64: +; BITALG-LABEL: ugt_44_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -18945,13 +16729,13 @@ define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_47_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_47_v4i64: +define <4 x i64> @ult_45_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_45_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -18971,13 +16755,13 @@ define <4 x i64> @ult_47_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_47_v4i64: +; AVX2-LABEL: ult_45_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -18989,19 +16773,19 @@ define <4 x i64> @ult_47_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_47_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_45_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_47_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_45_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19009,17 +16793,17 @@ define <4 x i64> @ult_47_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_47_v4i64: +; BITALG_NOVLX-LABEL: ult_45_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_47_v4i64: +; BITALG-LABEL: ult_45_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19029,13 +16813,13 @@ define <4 x i64> @ult_47_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_47_v4i64: +define <4 x i64> @ugt_45_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_45_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19055,13 +16839,13 @@ define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [45,45] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_47_v4i64: +; AVX2-LABEL: ugt_45_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19073,19 +16857,19 @@ define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_47_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_45_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_47_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_45_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19093,17 +16877,17 @@ define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_47_v4i64: +; BITALG_NOVLX-LABEL: ugt_45_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [45,45,45,45] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_47_v4i64: +; BITALG-LABEL: ugt_45_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19113,13 +16897,13 @@ define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_48_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_48_v4i64: +define <4 x i64> @ult_46_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_46_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19139,13 +16923,13 @@ define <4 x i64> @ult_48_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_48_v4i64: +; AVX2-LABEL: ult_46_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19157,19 +16941,19 @@ define <4 x i64> @ult_48_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_48_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_46_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_48_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_46_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19177,17 +16961,17 @@ define <4 x i64> @ult_48_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_48_v4i64: +; BITALG_NOVLX-LABEL: ult_46_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_48_v4i64: +; BITALG-LABEL: ult_46_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19197,13 +16981,13 @@ define <4 x i64> @ult_48_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_48_v4i64: +define <4 x i64> @ugt_46_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_46_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19223,13 +17007,13 @@ define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [46,46] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_48_v4i64: +; AVX2-LABEL: ugt_46_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19241,19 +17025,19 @@ define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_48_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_46_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_48_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_46_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19261,17 +17045,17 @@ define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_48_v4i64: +; BITALG_NOVLX-LABEL: ugt_46_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [46,46,46,46] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_48_v4i64: +; BITALG-LABEL: ugt_46_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19281,13 +17065,13 @@ define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_49_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_49_v4i64: +define <4 x i64> @ult_47_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_47_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19307,13 +17091,13 @@ define <4 x i64> @ult_49_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_49_v4i64: +; AVX2-LABEL: ult_47_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19325,19 +17109,19 @@ define <4 x i64> @ult_49_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_49_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_47_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_49_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_47_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19345,17 +17129,17 @@ define <4 x i64> @ult_49_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_49_v4i64: +; BITALG_NOVLX-LABEL: ult_47_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_49_v4i64: +; BITALG-LABEL: ult_47_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19365,13 +17149,13 @@ define <4 x i64> @ult_49_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_49_v4i64: +define <4 x i64> @ugt_47_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_47_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19391,13 +17175,13 @@ define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [47,47] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_49_v4i64: +; AVX2-LABEL: ugt_47_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19409,19 +17193,19 @@ define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_49_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_47_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_49_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_47_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19429,17 +17213,17 @@ define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_49_v4i64: +; BITALG_NOVLX-LABEL: ugt_47_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [47,47,47,47] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_49_v4i64: +; BITALG-LABEL: ugt_47_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19449,13 +17233,13 @@ define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_50_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_50_v4i64: +define <4 x i64> @ult_48_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_48_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19475,13 +17259,13 @@ define <4 x i64> @ult_50_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_50_v4i64: +; AVX2-LABEL: ult_48_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19493,19 +17277,19 @@ define <4 x i64> @ult_50_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_50_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_48_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_50_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_48_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19513,17 +17297,17 @@ define <4 x i64> @ult_50_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_50_v4i64: +; BITALG_NOVLX-LABEL: ult_48_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_50_v4i64: +; BITALG-LABEL: ult_48_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19533,13 +17317,13 @@ define <4 x i64> @ult_50_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_50_v4i64: +define <4 x i64> @ugt_48_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_48_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19559,13 +17343,13 @@ define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [48,48] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_50_v4i64: +; AVX2-LABEL: ugt_48_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19577,19 +17361,19 @@ define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_50_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_48_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_50_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_48_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19597,17 +17381,17 @@ define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_50_v4i64: +; BITALG_NOVLX-LABEL: ugt_48_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [48,48,48,48] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_50_v4i64: +; BITALG-LABEL: ugt_48_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19617,13 +17401,13 @@ define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_51_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_51_v4i64: +define <4 x i64> @ult_49_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_49_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19643,13 +17427,13 @@ define <4 x i64> @ult_51_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_51_v4i64: +; AVX2-LABEL: ult_49_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19661,19 +17445,19 @@ define <4 x i64> @ult_51_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_51_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_49_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_51_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_49_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19681,17 +17465,17 @@ define <4 x i64> @ult_51_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_51_v4i64: +; BITALG_NOVLX-LABEL: ult_49_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_51_v4i64: +; BITALG-LABEL: ult_49_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19701,13 +17485,13 @@ define <4 x i64> @ult_51_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_51_v4i64: +define <4 x i64> @ugt_49_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_49_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19727,13 +17511,13 @@ define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [49,49] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_51_v4i64: +; AVX2-LABEL: ugt_49_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19745,19 +17529,19 @@ define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_51_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_49_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_51_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_49_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19765,17 +17549,17 @@ define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_51_v4i64: +; BITALG_NOVLX-LABEL: ugt_49_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [49,49,49,49] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_51_v4i64: +; BITALG-LABEL: ugt_49_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19785,13 +17569,13 @@ define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_52_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_52_v4i64: +define <4 x i64> @ult_50_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_50_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19811,13 +17595,13 @@ define <4 x i64> @ult_52_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_52_v4i64: +; AVX2-LABEL: ult_50_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19829,19 +17613,19 @@ define <4 x i64> @ult_52_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_52_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_50_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_52_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_50_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19849,17 +17633,17 @@ define <4 x i64> @ult_52_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_52_v4i64: +; BITALG_NOVLX-LABEL: ult_50_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_52_v4i64: +; BITALG-LABEL: ult_50_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19869,13 +17653,13 @@ define <4 x i64> @ult_52_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_52_v4i64: +define <4 x i64> @ugt_50_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_50_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19895,13 +17679,13 @@ define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [50,50] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_52_v4i64: +; AVX2-LABEL: ugt_50_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19913,19 +17697,19 @@ define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_52_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_50_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_52_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_50_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -19933,17 +17717,17 @@ define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_52_v4i64: +; BITALG_NOVLX-LABEL: ugt_50_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [50,50,50,50] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_52_v4i64: +; BITALG-LABEL: ugt_50_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -19953,13 +17737,13 @@ define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_53_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_53_v4i64: +define <4 x i64> @ult_51_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_51_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -19979,13 +17763,13 @@ define <4 x i64> @ult_53_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_53_v4i64: +; AVX2-LABEL: ult_51_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -19997,19 +17781,19 @@ define <4 x i64> @ult_53_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_53_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_51_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_53_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_51_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20017,17 +17801,17 @@ define <4 x i64> @ult_53_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_53_v4i64: +; BITALG_NOVLX-LABEL: ult_51_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_53_v4i64: +; BITALG-LABEL: ult_51_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20037,13 +17821,13 @@ define <4 x i64> @ult_53_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_53_v4i64: +define <4 x i64> @ugt_51_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_51_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20063,13 +17847,13 @@ define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [51,51] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_53_v4i64: +; AVX2-LABEL: ugt_51_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20081,19 +17865,19 @@ define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_53_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_51_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_53_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_51_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20101,17 +17885,17 @@ define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_53_v4i64: +; BITALG_NOVLX-LABEL: ugt_51_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [51,51,51,51] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_53_v4i64: +; BITALG-LABEL: ugt_51_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20121,13 +17905,13 @@ define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_54_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_54_v4i64: +define <4 x i64> @ult_52_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_52_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20147,13 +17931,13 @@ define <4 x i64> @ult_54_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_54_v4i64: +; AVX2-LABEL: ult_52_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20165,19 +17949,19 @@ define <4 x i64> @ult_54_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_54_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_52_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_54_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_52_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20185,17 +17969,17 @@ define <4 x i64> @ult_54_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_54_v4i64: +; BITALG_NOVLX-LABEL: ult_52_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_54_v4i64: +; BITALG-LABEL: ult_52_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20205,13 +17989,13 @@ define <4 x i64> @ult_54_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_54_v4i64: +define <4 x i64> @ugt_52_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_52_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20231,13 +18015,13 @@ define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [52,52] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_54_v4i64: +; AVX2-LABEL: ugt_52_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20249,19 +18033,19 @@ define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_54_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_52_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_54_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_52_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20269,17 +18053,17 @@ define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_54_v4i64: +; BITALG_NOVLX-LABEL: ugt_52_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [52,52,52,52] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_54_v4i64: +; BITALG-LABEL: ugt_52_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20289,13 +18073,13 @@ define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_55_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_55_v4i64: +define <4 x i64> @ult_53_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_53_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20315,13 +18099,13 @@ define <4 x i64> @ult_55_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_55_v4i64: +; AVX2-LABEL: ult_53_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20333,19 +18117,19 @@ define <4 x i64> @ult_55_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_55_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_53_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_55_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_53_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20353,17 +18137,17 @@ define <4 x i64> @ult_55_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_55_v4i64: +; BITALG_NOVLX-LABEL: ult_53_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_55_v4i64: +; BITALG-LABEL: ult_53_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20373,13 +18157,13 @@ define <4 x i64> @ult_55_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_55_v4i64: +define <4 x i64> @ugt_53_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_53_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20399,13 +18183,13 @@ define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [53,53] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_55_v4i64: +; AVX2-LABEL: ugt_53_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20417,19 +18201,19 @@ define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_55_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_53_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_55_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_53_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20437,17 +18221,17 @@ define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_55_v4i64: +; BITALG_NOVLX-LABEL: ugt_53_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [53,53,53,53] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_55_v4i64: +; BITALG-LABEL: ugt_53_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20457,13 +18241,13 @@ define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_56_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_56_v4i64: +define <4 x i64> @ult_54_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_54_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20483,13 +18267,13 @@ define <4 x i64> @ult_56_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_56_v4i64: +; AVX2-LABEL: ult_54_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20501,19 +18285,19 @@ define <4 x i64> @ult_56_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_56_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_54_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_56_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_54_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20521,17 +18305,17 @@ define <4 x i64> @ult_56_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_56_v4i64: +; BITALG_NOVLX-LABEL: ult_54_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_56_v4i64: +; BITALG-LABEL: ult_54_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20541,13 +18325,13 @@ define <4 x i64> @ult_56_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_56_v4i64: +define <4 x i64> @ugt_54_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_54_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20567,13 +18351,13 @@ define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [54,54] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_56_v4i64: +; AVX2-LABEL: ugt_54_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20585,19 +18369,19 @@ define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_56_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_54_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_56_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_54_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20605,17 +18389,17 @@ define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_56_v4i64: +; BITALG_NOVLX-LABEL: ugt_54_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [54,54,54,54] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_56_v4i64: +; BITALG-LABEL: ugt_54_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20625,13 +18409,13 @@ define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_57_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_57_v4i64: +define <4 x i64> @ult_55_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_55_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20651,13 +18435,13 @@ define <4 x i64> @ult_57_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_57_v4i64: +; AVX2-LABEL: ult_55_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20669,19 +18453,19 @@ define <4 x i64> @ult_57_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_57_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_55_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_57_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_55_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20689,17 +18473,17 @@ define <4 x i64> @ult_57_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_57_v4i64: +; BITALG_NOVLX-LABEL: ult_55_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_57_v4i64: +; BITALG-LABEL: ult_55_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20709,13 +18493,13 @@ define <4 x i64> @ult_57_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_57_v4i64: +define <4 x i64> @ugt_55_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_55_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20735,13 +18519,13 @@ define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [55,55] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_57_v4i64: +; AVX2-LABEL: ugt_55_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20753,19 +18537,19 @@ define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_57_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_55_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_57_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_55_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20773,17 +18557,17 @@ define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_57_v4i64: +; BITALG_NOVLX-LABEL: ugt_55_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [55,55,55,55] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_57_v4i64: +; BITALG-LABEL: ugt_55_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20793,13 +18577,13 @@ define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_58_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_58_v4i64: +define <4 x i64> @ult_56_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_56_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20819,13 +18603,13 @@ define <4 x i64> @ult_58_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_58_v4i64: +; AVX2-LABEL: ult_56_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20837,19 +18621,19 @@ define <4 x i64> @ult_58_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_58_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_56_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_58_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_56_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20857,17 +18641,17 @@ define <4 x i64> @ult_58_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_58_v4i64: +; BITALG_NOVLX-LABEL: ult_56_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_58_v4i64: +; BITALG-LABEL: ult_56_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20877,13 +18661,13 @@ define <4 x i64> @ult_58_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_58_v4i64: +define <4 x i64> @ugt_56_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_56_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20903,13 +18687,13 @@ define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [56,56] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_58_v4i64: +; AVX2-LABEL: ugt_56_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -20921,19 +18705,19 @@ define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_58_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_56_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_58_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_56_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -20941,17 +18725,17 @@ define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_58_v4i64: +; BITALG_NOVLX-LABEL: ugt_56_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [56,56,56,56] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_58_v4i64: +; BITALG-LABEL: ugt_56_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -20961,13 +18745,13 @@ define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_59_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_59_v4i64: +define <4 x i64> @ult_57_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_57_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -20987,13 +18771,13 @@ define <4 x i64> @ult_59_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_59_v4i64: +; AVX2-LABEL: ult_57_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21005,19 +18789,19 @@ define <4 x i64> @ult_59_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_59_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_57_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_59_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_57_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21025,17 +18809,17 @@ define <4 x i64> @ult_59_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_59_v4i64: +; BITALG_NOVLX-LABEL: ult_57_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_59_v4i64: +; BITALG-LABEL: ult_57_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21045,13 +18829,13 @@ define <4 x i64> @ult_59_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_59_v4i64: +define <4 x i64> @ugt_57_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_57_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21071,13 +18855,13 @@ define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [57,57] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_59_v4i64: +; AVX2-LABEL: ugt_57_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21089,19 +18873,19 @@ define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_59_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_57_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_59_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_57_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21109,17 +18893,17 @@ define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_59_v4i64: +; BITALG_NOVLX-LABEL: ugt_57_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [57,57,57,57] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_59_v4i64: +; BITALG-LABEL: ugt_57_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21129,13 +18913,13 @@ define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_60_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_60_v4i64: +define <4 x i64> @ult_58_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_58_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21155,13 +18939,13 @@ define <4 x i64> @ult_60_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_60_v4i64: +; AVX2-LABEL: ult_58_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21173,19 +18957,19 @@ define <4 x i64> @ult_60_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_60_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_58_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_60_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_58_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21193,17 +18977,17 @@ define <4 x i64> @ult_60_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_60_v4i64: +; BITALG_NOVLX-LABEL: ult_58_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_60_v4i64: +; BITALG-LABEL: ult_58_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21213,13 +18997,13 @@ define <4 x i64> @ult_60_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_60_v4i64: +define <4 x i64> @ugt_58_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_58_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21239,13 +19023,13 @@ define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [58,58] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_60_v4i64: +; AVX2-LABEL: ugt_58_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21257,19 +19041,19 @@ define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_60_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_58_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_60_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_58_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21277,17 +19061,17 @@ define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_60_v4i64: +; BITALG_NOVLX-LABEL: ugt_58_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [58,58,58,58] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_60_v4i64: +; BITALG-LABEL: ugt_58_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21297,13 +19081,13 @@ define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_61_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_61_v4i64: +define <4 x i64> @ult_59_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_59_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21323,13 +19107,13 @@ define <4 x i64> @ult_61_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_61_v4i64: +; AVX2-LABEL: ult_59_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21341,19 +19125,19 @@ define <4 x i64> @ult_61_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_61_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_59_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_61_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_59_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21361,17 +19145,17 @@ define <4 x i64> @ult_61_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_61_v4i64: +; BITALG_NOVLX-LABEL: ult_59_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_61_v4i64: +; BITALG-LABEL: ult_59_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21381,13 +19165,13 @@ define <4 x i64> @ult_61_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_61_v4i64: +define <4 x i64> @ugt_59_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_59_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21407,13 +19191,13 @@ define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [59,59] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_61_v4i64: +; AVX2-LABEL: ugt_59_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21425,19 +19209,19 @@ define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_61_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_59_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_61_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_59_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21445,17 +19229,17 @@ define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_61_v4i64: +; BITALG_NOVLX-LABEL: ugt_59_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [59,59,59,59] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_61_v4i64: +; BITALG-LABEL: ugt_59_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21465,13 +19249,13 @@ define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_62_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_62_v4i64: +define <4 x i64> @ult_60_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_60_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21491,13 +19275,13 @@ define <4 x i64> @ult_62_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_62_v4i64: +; AVX2-LABEL: ult_60_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21509,19 +19293,19 @@ define <4 x i64> @ult_62_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_62_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_60_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_62_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_60_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21529,17 +19313,17 @@ define <4 x i64> @ult_62_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_62_v4i64: +; BITALG_NOVLX-LABEL: ult_60_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_62_v4i64: +; BITALG-LABEL: ult_60_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21549,13 +19333,13 @@ define <4 x i64> @ult_62_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_62_v4i64: +define <4 x i64> @ugt_60_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_60_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21575,13 +19359,13 @@ define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [60,60] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_62_v4i64: +; AVX2-LABEL: ugt_60_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21593,19 +19377,19 @@ define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_62_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_60_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_62_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_60_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21613,17 +19397,17 @@ define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_62_v4i64: +; BITALG_NOVLX-LABEL: ugt_60_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [60,60,60,60] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_62_v4i64: +; BITALG-LABEL: ugt_60_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21633,13 +19417,13 @@ define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_63_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_63_v4i64: +define <4 x i64> @ult_61_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_61_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21659,13 +19443,13 @@ define <4 x i64> @ult_63_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_63_v4i64: +; AVX2-LABEL: ult_61_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21677,19 +19461,19 @@ define <4 x i64> @ult_63_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_63_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_61_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_63_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_61_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21697,17 +19481,17 @@ define <4 x i64> @ult_63_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_63_v4i64: +; BITALG_NOVLX-LABEL: ult_61_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_63_v4i64: +; BITALG-LABEL: ult_61_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21717,13 +19501,13 @@ define <4 x i64> @ult_63_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_63_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_63_v4i64: +define <4 x i64> @ugt_61_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_61_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21743,13 +19527,13 @@ define <4 x i64> @ugt_63_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [61,61] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_63_v4i64: +; AVX2-LABEL: ugt_61_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21761,19 +19545,19 @@ define <4 x i64> @ugt_63_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_63_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_61_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_63_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_61_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21781,17 +19565,17 @@ define <4 x i64> @ugt_63_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_63_v4i64: +; BITALG_NOVLX-LABEL: ugt_61_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [61,61,61,61] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_63_v4i64: +; BITALG-LABEL: ugt_61_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21801,13 +19585,13 @@ define <4 x i64> @ugt_63_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_64_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_64_v4i64: +define <4 x i64> @ult_62_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_62_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21827,13 +19611,13 @@ define <4 x i64> @ult_64_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_64_v4i64: +; AVX2-LABEL: ult_62_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21845,19 +19629,19 @@ define <4 x i64> @ult_64_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_64_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_62_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_64_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_62_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21865,17 +19649,17 @@ define <4 x i64> @ult_64_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_64_v4i64: +; BITALG_NOVLX-LABEL: ult_62_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_64_v4i64: +; BITALG-LABEL: ult_62_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21885,13 +19669,13 @@ define <4 x i64> @ult_64_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ugt_64_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ugt_64_v4i64: +define <4 x i64> @ugt_62_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ugt_62_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21911,13 +19695,13 @@ define <4 x i64> @ugt_64_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [64,64] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [62,62] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ugt_64_v4i64: +; AVX2-LABEL: ugt_62_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -21929,19 +19713,19 @@ define <4 x i64> @ugt_64_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_64_v4i64: +; AVX512VPOPCNTDQ-LABEL: ugt_62_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ugt_64_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ugt_62_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpnleuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -21949,17 +19733,17 @@ define <4 x i64> @ugt_64_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ugt_64_v4i64: +; BITALG_NOVLX-LABEL: ugt_62_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [64,64,64,64] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [62,62,62,62] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ugt_64_v4i64: +; BITALG-LABEL: ugt_62_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -21969,13 +19753,13 @@ define <4 x i64> @ugt_64_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ugt <4 x i64> %2, + %3 = icmp ugt <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } -define <4 x i64> @ult_65_v4i64(<4 x i64> %0) { -; AVX1-LABEL: ult_65_v4i64: +define <4 x i64> @ult_63_v4i64(<4 x i64> %0) { +; AVX1-LABEL: ult_63_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2 @@ -21995,13 +19779,13 @@ define <4 x i64> @ult_65_v4i64(<4 x i64> %0) { ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0 ; AVX1-NEXT: vpsadbw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [65,65] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: ult_65_v4i64: +; AVX2-LABEL: ult_63_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -22013,19 +19797,19 @@ define <4 x i64> @ult_65_v4i64(<4 x i64> %0) { ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [65,65,65,65] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_65_v4i64: +; AVX512VPOPCNTDQ-LABEL: ult_63_v4i64: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [65,65,65,65] +; AVX512VPOPCNTDQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq ; -; AVX512VPOPCNTDQVL-LABEL: ult_65_v4i64: +; AVX512VPOPCNTDQVL-LABEL: ult_63_v4i64: ; AVX512VPOPCNTDQVL: # %bb.0: ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0 ; AVX512VPOPCNTDQVL-NEXT: vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1 @@ -22033,17 +19817,17 @@ define <4 x i64> @ult_65_v4i64(<4 x i64> %0) { ; AVX512VPOPCNTDQVL-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512VPOPCNTDQVL-NEXT: retq ; -; BITALG_NOVLX-LABEL: ult_65_v4i64: +; BITALG_NOVLX-LABEL: ult_63_v4i64: ; BITALG_NOVLX: # %bb.0: ; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [65,65,65,65] +; BITALG_NOVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [63,63,63,63] ; BITALG_NOVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; BITALG_NOVLX-NEXT: retq ; -; BITALG-LABEL: ult_65_v4i64: +; BITALG-LABEL: ult_63_v4i64: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -22053,7 +19837,7 @@ define <4 x i64> @ult_65_v4i64(<4 x i64> %0) { ; BITALG-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %0) - %3 = icmp ult <4 x i64> %2, + %3 = icmp ult <4 x i64> %2, %4 = sext <4 x i1> %3 to <4 x i64> ret <4 x i64> %4 } diff --git a/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll index 939dabd..bea76ef 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll @@ -6,193 +6,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=AVX512 --check-prefix=BITALG -define <64 x i8> @ult_0_v64i8(<64 x i8> %0) { -; AVX512-LABEL: ult_0_v64i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: retq - %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0) - %3 = icmp ult <64 x i8> %2, - %4 = sext <64 x i1> %3 to <64 x i8> - ret <64 x i8> %4 -} - -define <64 x i8> @ugt_0_v64i8(<64 x i8> %0) { -; AVX512F-LABEL: ugt_0_v64i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_0_v64i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestmb %zmm0, %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_0_v64i8: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_0_v64i8: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vptestmb %zmm0, %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2b %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ugt_0_v64i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vptestmb %zmm0, %zmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0) - %3 = icmp ugt <64 x i8> %2, - %4 = sext <64 x i1> %3 to <64 x i8> - ret <64 x i8> %4 -} - -define <64 x i8> @ult_1_v64i8(<64 x i8> %0) { -; AVX512F-LABEL: ult_1_v64i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_1_v64i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_1_v64i8: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ult_1_v64i8: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2b %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ult_1_v64i8: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vptestnmb %zmm0, %zmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0) - %3 = icmp ult <64 x i8> %2, - %4 = sext <64 x i1> %3 to <64 x i8> - ret <64 x i8> %4 -} - define <64 x i8> @ugt_1_v64i8(<64 x i8> %0) { ; AVX512F-LABEL: ugt_1_v64i8: ; AVX512F: # %bb.0: @@ -1219,99 +1032,124 @@ define <64 x i8> @ult_7_v64i8(<64 x i8> %0) { ret <64 x i8> %4 } -define <64 x i8> @ugt_7_v64i8(<64 x i8> %0) { -; AVX512F-LABEL: ugt_7_v64i8: +define <32 x i16> @ugt_1_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_1_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 -; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpmaxub %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm3 +; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm2 +; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_7_v64i8: +; AVX512BW-LABEL: ugt_1_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleub {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 +; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_7_v64i8: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_1_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmaxub %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm1, %ymm3 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm0, %ymm2 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_7_v64i8: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_1_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleub {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512VPOPCNTDQ-BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 +; AVX512VPOPCNTDQ-BW-NEXT: vptestmw %zmm1, %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_7_v64i8: +; BITALG-LABEL: ugt_1_v32i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %zmm0 +; BITALG-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq - %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0) - %3 = icmp ugt <64 x i8> %2, - %4 = sext <64 x i1> %3 to <64 x i8> - ret <64 x i8> %4 + %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) + %3 = icmp ugt <32 x i16> %2, + %4 = sext <32 x i1> %3 to <32 x i16> + ret <32 x i16> %4 +} + +define <32 x i16> @ult_2_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_2_v32i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm3 +; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm2 +; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: ult_2_v32i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 +; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_2_v32i16: +; AVX512VPOPCNTDQ-NOBW: # %bb.0: +; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm1, %ymm3 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm0, %ymm2 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: retq +; +; AVX512VPOPCNTDQ-BW-LABEL: ult_2_v32i16: +; AVX512VPOPCNTDQ-BW: # %bb.0: +; AVX512VPOPCNTDQ-BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512VPOPCNTDQ-BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 +; AVX512VPOPCNTDQ-BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: retq +; +; BITALG-LABEL: ult_2_v32i16: +; BITALG: # %bb.0: +; BITALG-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %zmm0 +; BITALG-NEXT: retq + %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) + %3 = icmp ult <32 x i16> %2, + %4 = sext <32 x i1> %3 to <32 x i16> + ret <32 x i16> %4 } -define <64 x i8> @ult_8_v64i8(<64 x i8> %0) { -; AVX512F-LABEL: ult_8_v64i8: +define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_2_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1321,6 +1159,9 @@ define <64 x i8> @ult_8_v64i8(<64 x i8> %0) { ; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 ; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 ; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 +; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 ; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 @@ -1328,15 +1169,16 @@ define <64 x i8> @ult_8_v64i8(<64 x i8> %0) { ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512F-NEXT: vpminub %ymm1, %ymm0, %ymm3 -; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpminub %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 +; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_8_v64i8: +; AVX512BW-LABEL: ugt_2_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1346,36 +1188,29 @@ define <64 x i8> @ult_8_v64i8(<64 x i8> %0) { ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltub {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_8_v64i8: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_2_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpminub %ymm1, %ymm0, %ymm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpminub %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_8_v64i8: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_2_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1385,24 +1220,27 @@ define <64 x i8> @ult_8_v64i8(<64 x i8> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltub {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 +; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_8_v64i8: +; BITALG-LABEL: ugt_2_v32i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %zmm0 +; BITALG-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq - %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0) - %3 = icmp ult <64 x i8> %2, - %4 = sext <64 x i1> %3 to <64 x i8> - ret <64 x i8> %4 + %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) + %3 = icmp ugt <32 x i16> %2, + %4 = sext <32 x i1> %3 to <32 x i16> + ret <32 x i16> %4 } -define <64 x i8> @ugt_8_v64i8(<64 x i8> %0) { -; AVX512F-LABEL: ugt_8_v64i8: +define <32 x i16> @ult_3_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_3_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1412,6 +1250,9 @@ define <64 x i8> @ugt_8_v64i8(<64 x i8> %0) { ; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 ; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 ; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 +; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 ; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 @@ -1419,15 +1260,16 @@ define <64 x i8> @ugt_8_v64i8(<64 x i8> %0) { ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; AVX512F-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 -; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpmaxub %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 +; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_8_v64i8: +; AVX512BW-LABEL: ult_3_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1437,36 +1279,29 @@ define <64 x i8> @ugt_8_v64i8(<64 x i8> %0) { ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleub {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_8_v64i8: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_3_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmaxub %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_8_v64i8: +; AVX512VPOPCNTDQ-BW-LABEL: ult_3_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1476,24 +1311,27 @@ define <64 x i8> @ugt_8_v64i8(<64 x i8> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleub {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 +; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_8_v64i8: +; BITALG-LABEL: ult_3_v32i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleub {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %zmm0 +; BITALG-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq - %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0) - %3 = icmp ugt <64 x i8> %2, - %4 = sext <64 x i1> %3 to <64 x i8> - ret <64 x i8> %4 + %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) + %3 = icmp ult <32 x i16> %2, + %4 = sext <32 x i1> %3 to <32 x i16> + ret <32 x i16> %4 } -define <64 x i8> @ult_9_v64i8(<64 x i8> %0) { -; AVX512F-LABEL: ult_9_v64i8: +define <32 x i16> @ugt_3_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_3_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1503,6 +1341,9 @@ define <64 x i8> @ult_9_v64i8(<64 x i8> %0) { ; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 ; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 ; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 +; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 ; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 @@ -1510,15 +1351,16 @@ define <64 x i8> @ult_9_v64i8(<64 x i8> %0) { ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpminub %ymm1, %ymm0, %ymm3 -; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpminub %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 +; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_9_v64i8: +; AVX512BW-LABEL: ugt_3_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1528,36 +1370,29 @@ define <64 x i8> @ult_9_v64i8(<64 x i8> %0) { ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltub {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_9_v64i8: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_3_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpminub %ymm1, %ymm0, %ymm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpminub %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_9_v64i8: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_3_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1567,35 +1402,27 @@ define <64 x i8> @ult_9_v64i8(<64 x i8> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltub {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 +; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_9_v64i8: +; BITALG-LABEL: ugt_3_v32i16: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltub {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2b %k0, %zmm0 +; BITALG-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq - %2 = tail call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %0) - %3 = icmp ult <64 x i8> %2, - %4 = sext <64 x i1> %3 to <64 x i8> - ret <64 x i8> %4 -} - -define <32 x i16> @ult_0_v32i16(<32 x i16> %0) { -; AVX512-LABEL: ult_0_v32i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_0_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_0_v32i16: +define <32 x i16> @ult_4_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_4_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1618,14 +1445,13 @@ define <32 x i16> @ugt_0_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_0_v32i16: +; AVX512BW-LABEL: ult_4_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1638,11 +1464,11 @@ define <32 x i16> @ugt_0_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestmw %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_0_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_4_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -1651,14 +1477,13 @@ define <32 x i16> @ugt_0_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_0_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_4_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1671,24 +1496,24 @@ define <32 x i16> @ugt_0_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vptestmw %zmm0, %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_0_v32i16: +; BITALG-LABEL: ult_4_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vptestmw %zmm0, %zmm0, %k0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_1_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_1_v32i16: +define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_4_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1711,13 +1536,13 @@ define <32 x i16> @ult_1_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_1_v32i16: +; AVX512BW-LABEL: ugt_4_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1730,11 +1555,11 @@ define <32 x i16> @ult_1_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_1_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_4_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -1743,13 +1568,13 @@ define <32 x i16> @ult_1_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_1_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_4_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1762,140 +1587,24 @@ define <32 x i16> @ult_1_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vptestnmw %zmm0, %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ult_1_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vptestnmw %zmm0, %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ugt_1_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_1_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm2 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_1_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_1_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm1, %ymm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_1_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vptestmw %zmm1, %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_1_v32i16: +; BITALG-LABEL: ugt_4_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ult_2_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_2_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpaddw %ymm2, %ymm0, %ymm2 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_2_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_2_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm1, %ymm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpaddw %ymm2, %ymm0, %ymm2 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ult_2_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddw %zmm1, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ult_2_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_2_v32i16: +define <32 x i16> @ult_5_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_5_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -1918,13 +1627,13 @@ define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_2_v32i16: +; AVX512BW-LABEL: ult_5_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1937,11 +1646,11 @@ define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_2_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_5_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -1950,13 +1659,13 @@ define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_2_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_5_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -1969,24 +1678,24 @@ define <32 x i16> @ugt_2_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_2_v32i16: +; BITALG-LABEL: ult_5_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_3_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_3_v32i16: +define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_5_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2009,13 +1718,13 @@ define <32 x i16> @ult_3_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] -; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_3_v32i16: +; AVX512BW-LABEL: ugt_5_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2028,11 +1737,11 @@ define <32 x i16> @ult_3_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 +; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_3_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_5_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2041,13 +1750,13 @@ define <32 x i16> @ult_3_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_3_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_5_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2060,24 +1769,24 @@ define <32 x i16> @ult_3_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 +; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_3_v32i16: +; BITALG-LABEL: ugt_5_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 +; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_3_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_3_v32i16: +define <32 x i16> @ult_6_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_6_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2100,13 +1809,13 @@ define <32 x i16> @ugt_3_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_3_v32i16: +; AVX512BW-LABEL: ult_6_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2119,11 +1828,11 @@ define <32 x i16> @ugt_3_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 +; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_3_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_6_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2132,104 +1841,13 @@ define <32 x i16> @ugt_3_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_3_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ugt_3_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ult_4_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_4_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] -; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_4_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_4_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_4_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_6_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2246,20 +1864,20 @@ define <32 x i16> @ult_4_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_4_v32i16: +; BITALG-LABEL: ult_6_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_4_v32i16: +define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_6_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2282,13 +1900,13 @@ define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_4_v32i16: +; AVX512BW-LABEL: ugt_6_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2305,7 +1923,7 @@ define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_4_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_6_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2314,13 +1932,13 @@ define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_4_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_6_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2337,20 +1955,20 @@ define <32 x i16> @ugt_4_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_4_v32i16: +; BITALG-LABEL: ugt_6_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_5_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_5_v32i16: +define <32 x i16> @ult_7_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_7_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2373,13 +1991,13 @@ define <32 x i16> @ult_5_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_5_v32i16: +; AVX512BW-LABEL: ult_7_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2396,7 +2014,7 @@ define <32 x i16> @ult_5_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_5_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_7_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2405,13 +2023,13 @@ define <32 x i16> @ult_5_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_5_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_7_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2428,20 +2046,20 @@ define <32 x i16> @ult_5_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_5_v32i16: +; BITALG-LABEL: ult_7_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_5_v32i16: +define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_7_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2464,13 +2082,13 @@ define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_5_v32i16: +; AVX512BW-LABEL: ugt_7_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2487,7 +2105,7 @@ define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_5_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_7_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2496,13 +2114,13 @@ define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_5_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_7_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2519,20 +2137,20 @@ define <32 x i16> @ugt_5_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_5_v32i16: +; BITALG-LABEL: ugt_7_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_6_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_6_v32i16: +define <32 x i16> @ult_8_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_8_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2555,13 +2173,13 @@ define <32 x i16> @ult_6_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_6_v32i16: +; AVX512BW-LABEL: ult_8_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2578,7 +2196,7 @@ define <32 x i16> @ult_6_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_6_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_8_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2587,13 +2205,13 @@ define <32 x i16> @ult_6_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_6_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_8_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2610,20 +2228,20 @@ define <32 x i16> @ult_6_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_6_v32i16: +; BITALG-LABEL: ult_8_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_6_v32i16: +define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_8_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2646,13 +2264,13 @@ define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_6_v32i16: +; AVX512BW-LABEL: ugt_8_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2669,7 +2287,7 @@ define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_6_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_8_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2678,13 +2296,13 @@ define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_6_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_8_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2701,20 +2319,20 @@ define <32 x i16> @ugt_6_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_6_v32i16: +; BITALG-LABEL: ugt_8_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_7_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_7_v32i16: +define <32 x i16> @ult_9_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_9_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2737,13 +2355,13 @@ define <32 x i16> @ult_7_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_7_v32i16: +; AVX512BW-LABEL: ult_9_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2760,7 +2378,7 @@ define <32 x i16> @ult_7_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_7_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_9_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2769,13 +2387,13 @@ define <32 x i16> @ult_7_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_7_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_9_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2792,20 +2410,20 @@ define <32 x i16> @ult_7_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_7_v32i16: +; BITALG-LABEL: ult_9_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_7_v32i16: +define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_9_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2828,13 +2446,13 @@ define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_7_v32i16: +; AVX512BW-LABEL: ugt_9_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2851,7 +2469,7 @@ define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_7_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_9_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2860,13 +2478,13 @@ define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_7_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_9_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2883,20 +2501,20 @@ define <32 x i16> @ugt_7_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_7_v32i16: +; BITALG-LABEL: ugt_9_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_8_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_8_v32i16: +define <32 x i16> @ult_10_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_10_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -2919,13 +2537,13 @@ define <32 x i16> @ult_8_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_8_v32i16: +; AVX512BW-LABEL: ult_10_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2942,7 +2560,7 @@ define <32 x i16> @ult_8_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_8_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_10_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -2951,13 +2569,13 @@ define <32 x i16> @ult_8_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_8_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_10_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -2974,20 +2592,20 @@ define <32 x i16> @ult_8_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_8_v32i16: +; BITALG-LABEL: ult_10_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_8_v32i16: +define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_10_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3010,13 +2628,13 @@ define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_8_v32i16: +; AVX512BW-LABEL: ugt_10_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3033,7 +2651,7 @@ define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_8_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_10_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3042,13 +2660,13 @@ define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_8_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_10_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3065,20 +2683,20 @@ define <32 x i16> @ugt_8_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_8_v32i16: +; BITALG-LABEL: ugt_10_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_9_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_9_v32i16: +define <32 x i16> @ult_11_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_11_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3101,13 +2719,13 @@ define <32 x i16> @ult_9_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_9_v32i16: +; AVX512BW-LABEL: ult_11_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3124,7 +2742,7 @@ define <32 x i16> @ult_9_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_9_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_11_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3133,13 +2751,13 @@ define <32 x i16> @ult_9_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_9_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_11_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3156,20 +2774,20 @@ define <32 x i16> @ult_9_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_9_v32i16: +; BITALG-LABEL: ult_11_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_9_v32i16: +define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_11_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3192,13 +2810,13 @@ define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_9_v32i16: +; AVX512BW-LABEL: ugt_11_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3215,7 +2833,7 @@ define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_9_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_11_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3224,13 +2842,13 @@ define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_9_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_11_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3247,20 +2865,20 @@ define <32 x i16> @ugt_9_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_9_v32i16: +; BITALG-LABEL: ugt_11_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_10_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_10_v32i16: +define <32 x i16> @ult_12_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_12_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3283,13 +2901,13 @@ define <32 x i16> @ult_10_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_10_v32i16: +; AVX512BW-LABEL: ult_12_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3306,7 +2924,7 @@ define <32 x i16> @ult_10_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_10_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_12_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3315,13 +2933,13 @@ define <32 x i16> @ult_10_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_10_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_12_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3338,20 +2956,20 @@ define <32 x i16> @ult_10_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_10_v32i16: +; BITALG-LABEL: ult_12_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_10_v32i16: +define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_12_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3374,13 +2992,13 @@ define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_10_v32i16: +; AVX512BW-LABEL: ugt_12_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3397,7 +3015,7 @@ define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_10_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_12_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3406,13 +3024,13 @@ define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_10_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_12_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3429,20 +3047,20 @@ define <32 x i16> @ugt_10_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_10_v32i16: +; BITALG-LABEL: ugt_12_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_11_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_11_v32i16: +define <32 x i16> @ult_13_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_13_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3465,13 +3083,13 @@ define <32 x i16> @ult_11_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_11_v32i16: +; AVX512BW-LABEL: ult_13_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3488,7 +3106,7 @@ define <32 x i16> @ult_11_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_11_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_13_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3497,13 +3115,13 @@ define <32 x i16> @ult_11_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_11_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_13_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3520,20 +3138,20 @@ define <32 x i16> @ult_11_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_11_v32i16: +; BITALG-LABEL: ult_13_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_11_v32i16: +define <32 x i16> @ugt_13_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_13_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3556,13 +3174,13 @@ define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_11_v32i16: +; AVX512BW-LABEL: ugt_13_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3579,7 +3197,7 @@ define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_11_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_13_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3588,13 +3206,13 @@ define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_11_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_13_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3611,20 +3229,20 @@ define <32 x i16> @ugt_11_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_11_v32i16: +; BITALG-LABEL: ugt_13_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_12_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_12_v32i16: +define <32 x i16> @ult_14_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_14_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3647,13 +3265,13 @@ define <32 x i16> @ult_12_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_12_v32i16: +; AVX512BW-LABEL: ult_14_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3670,7 +3288,7 @@ define <32 x i16> @ult_12_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_12_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_14_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3679,13 +3297,13 @@ define <32 x i16> @ult_12_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_12_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_14_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3702,20 +3320,20 @@ define <32 x i16> @ult_12_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_12_v32i16: +; BITALG-LABEL: ult_14_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_12_v32i16: +define <32 x i16> @ugt_14_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ugt_14_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3738,13 +3356,13 @@ define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_12_v32i16: +; AVX512BW-LABEL: ugt_14_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3761,7 +3379,7 @@ define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_12_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_14_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3770,13 +3388,13 @@ define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_12_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ugt_14_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3793,20 +3411,20 @@ define <32 x i16> @ugt_12_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ugt_12_v32i16: +; BITALG-LABEL: ugt_14_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, + %3 = icmp ugt <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ult_13_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_13_v32i16: +define <32 x i16> @ult_15_v32i16(<32 x i16> %0) { +; AVX512F-LABEL: ult_15_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 @@ -3829,13 +3447,13 @@ define <32 x i16> @ult_13_v32i16(<32 x i16> %0) { ; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_13_v32i16: +; AVX512BW-LABEL: ult_15_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3852,7 +3470,7 @@ define <32 x i16> @ult_13_v32i16(<32 x i16> %0) { ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_13_v32i16: +; AVX512VPOPCNTDQ-NOBW-LABEL: ult_15_v32i16: ; AVX512VPOPCNTDQ-NOBW: # %bb.0: ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 @@ -3861,13 +3479,13 @@ define <32 x i16> @ult_13_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] +; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-NOBW-NEXT: retq ; -; AVX512VPOPCNTDQ-BW-LABEL: ult_13_v32i16: +; AVX512VPOPCNTDQ-BW-LABEL: ult_15_v32i16: ; AVX512VPOPCNTDQ-BW: # %bb.0: ; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -3884,1465 +3502,94 @@ define <32 x i16> @ult_13_v32i16(<32 x i16> %0) { ; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq ; -; BITALG-LABEL: ult_13_v32i16: +; BITALG-LABEL: ult_15_v32i16: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntw %zmm0, %zmm0 ; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 ; BITALG-NEXT: vpmovm2w %k0, %zmm0 ; BITALG-NEXT: retq %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, + %3 = icmp ult <32 x i16> %2, %4 = sext <32 x i1> %3 to <32 x i16> ret <32 x i16> %4 } -define <32 x i16> @ugt_13_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_13_v32i16: +define <16 x i32> @ugt_1_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_1_v16i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_13_v32i16: +; AVX512BW-LABEL: ugt_1_v16i32: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 +; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm1 +; AVX512BW-NEXT: vptestmd %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_13_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_13_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ugt_13_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ult_14_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_14_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_14_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_14_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ult_14_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ult_14_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ugt_14_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_14_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_14_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_14_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_14_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ugt_14_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ult_15_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_15_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_15_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_15_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ult_15_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ult_15_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ugt_15_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_15_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_15_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_15_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_15_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ugt_15_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ult_16_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_16_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_16_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_16_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ult_16_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ult_16_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ugt_16_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ugt_16_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_16_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ugt_16_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ugt_16_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ugt_16_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ugt <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <32 x i16> @ult_17_v32i16(<32 x i16> %0) { -; AVX512F-LABEL: ult_17_v32i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm1, %ymm4, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsllw $8, %ymm2, %ymm4 -; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 -; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm4 -; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm1 -; AVX512F-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] -; AVX512F-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_17_v32i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-NOBW-LABEL: ult_17_v32i16: -; AVX512VPOPCNTDQ-NOBW: # %bb.0: -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VPOPCNTDQ-NOBW-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17] -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; AVX512VPOPCNTDQ-NOBW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-NOBW-NEXT: retq -; -; AVX512VPOPCNTDQ-BW-LABEL: ult_17_v32i16: -; AVX512VPOPCNTDQ-BW: # %bb.0: -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsllw $8, %zmm0, %zmm1 -; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; AVX512VPOPCNTDQ-BW-NEXT: vpmovm2w %k0, %zmm0 -; AVX512VPOPCNTDQ-BW-NEXT: retq -; -; BITALG-LABEL: ult_17_v32i16: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntw %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuw {{.*}}(%rip), %zmm0, %k0 -; BITALG-NEXT: vpmovm2w %k0, %zmm0 -; BITALG-NEXT: retq - %2 = tail call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %0) - %3 = icmp ult <32 x i16> %2, - %4 = sext <32 x i1> %3 to <32 x i16> - ret <32 x i16> %4 -} - -define <16 x i32> @ult_0_v16i32(<16 x i32> %0) { -; AVX512-LABEL: ult_0_v16i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ugt_0_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_0_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_0_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ugt_0_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vptestmd %zmm0, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ult_1_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_1_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_1_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_1_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vptestnmd %zmm0, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ult_1_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vptestnmd %zmm0, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ugt_1_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_1_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_1_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vptestmd %zmm1, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ugt_1_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; BITALG-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; BITALG-NEXT: vptestmd %zmm1, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ult_2_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_2_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; AVX512F-NEXT: vptestnmd %zmm1, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_2_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vptestnmd %zmm1, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_2_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ult_2_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 -; BITALG-NEXT: vpaddd %zmm1, %zmm0, %zmm1 -; BITALG-NEXT: vptestnmd %zmm1, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ugt_2_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_2_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_2_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ugt_2_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ult_3_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_3_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_3_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_3_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ult_3_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ugt_3_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_3_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_3_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ugt_3_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ult_4_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_4_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_4_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_4_v16i32: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ult_4_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ugt_4_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_4_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_4_v16i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ugt_4_v16i32: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 -; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, - %4 = sext <16 x i1> %3 to <16 x i32> - ret <16 x i32> %4 -} - -define <16 x i32> @ult_5_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_5_v16i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512VPOPCNTDQ-NEXT: retq +; +; BITALG-LABEL: ugt_1_v16i32: +; BITALG: # %bb.0: +; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; BITALG-NEXT: vpaddd %zmm1, %zmm0, %zmm1 +; BITALG-NEXT: vptestmd %zmm1, %zmm0, %k1 +; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; BITALG-NEXT: retq + %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) + %3 = icmp ugt <16 x i32> %2, + %4 = sext <16 x i1> %3 to <16 x i32> + ret <16 x i32> %4 +} + +define <16 x i32> @ult_2_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_2_v16i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm1 +; AVX512F-NEXT: vptestnmd %zmm1, %zmm0, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_5_v16i32: +; AVX512BW-LABEL: ult_2_v16i32: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm1 +; AVX512BW-NEXT: vptestnmd %zmm1, %zmm0, %k1 ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_5_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_2_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_5_v16i32: +; BITALG-LABEL: ult_2_v16i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 -; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 +; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; BITALG-NEXT: vpaddd %zmm1, %zmm0, %zmm1 +; BITALG-NEXT: vptestnmd %zmm1, %zmm0, %k1 ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_5_v16i32: +define <16 x i32> @ugt_2_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_2_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5375,7 +3622,7 @@ define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_5_v16i32: +; AVX512BW-LABEL: ugt_2_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5395,14 +3642,14 @@ define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_5_v16i32: +; BITALG-LABEL: ugt_2_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5415,13 +3662,13 @@ define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_6_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_6_v16i32: +define <16 x i32> @ult_3_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_3_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5454,7 +3701,7 @@ define <16 x i32> @ult_6_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_6_v16i32: +; AVX512BW-LABEL: ult_3_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5474,14 +3721,14 @@ define <16 x i32> @ult_6_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_6_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_3_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_6_v16i32: +; BITALG-LABEL: ult_3_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5494,13 +3741,13 @@ define <16 x i32> @ult_6_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_6_v16i32: +define <16 x i32> @ugt_3_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_3_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5533,7 +3780,7 @@ define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_6_v16i32: +; AVX512BW-LABEL: ugt_3_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5553,14 +3800,14 @@ define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_6_v16i32: +; BITALG-LABEL: ugt_3_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5573,13 +3820,13 @@ define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_7_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_7_v16i32: +define <16 x i32> @ult_4_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_4_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5612,7 +3859,7 @@ define <16 x i32> @ult_7_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_7_v16i32: +; AVX512BW-LABEL: ult_4_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5632,14 +3879,14 @@ define <16 x i32> @ult_7_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_7_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_4_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_7_v16i32: +; BITALG-LABEL: ult_4_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5652,13 +3899,13 @@ define <16 x i32> @ult_7_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_7_v16i32: +define <16 x i32> @ugt_4_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_4_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5691,7 +3938,7 @@ define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_7_v16i32: +; AVX512BW-LABEL: ugt_4_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5711,14 +3958,14 @@ define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_7_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_7_v16i32: +; BITALG-LABEL: ugt_4_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5731,13 +3978,13 @@ define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_8_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_8_v16i32: +define <16 x i32> @ult_5_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_5_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5770,7 +4017,7 @@ define <16 x i32> @ult_8_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_8_v16i32: +; AVX512BW-LABEL: ult_5_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5790,14 +4037,14 @@ define <16 x i32> @ult_8_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_8_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_5_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_8_v16i32: +; BITALG-LABEL: ult_5_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5810,13 +4057,13 @@ define <16 x i32> @ult_8_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_8_v16i32: +define <16 x i32> @ugt_5_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_5_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5849,7 +4096,7 @@ define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_8_v16i32: +; AVX512BW-LABEL: ugt_5_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5869,14 +4116,14 @@ define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_8_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_8_v16i32: +; BITALG-LABEL: ugt_5_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5889,13 +4136,13 @@ define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_9_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_9_v16i32: +define <16 x i32> @ult_6_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_6_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -5928,7 +4175,7 @@ define <16 x i32> @ult_9_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_9_v16i32: +; AVX512BW-LABEL: ult_6_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -5948,14 +4195,14 @@ define <16 x i32> @ult_9_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_9_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_6_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_9_v16i32: +; BITALG-LABEL: ult_6_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -5968,13 +4215,13 @@ define <16 x i32> @ult_9_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_9_v16i32: +define <16 x i32> @ugt_6_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_6_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6007,7 +4254,7 @@ define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_9_v16i32: +; AVX512BW-LABEL: ugt_6_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6027,14 +4274,14 @@ define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_9_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_9_v16i32: +; BITALG-LABEL: ugt_6_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6047,13 +4294,13 @@ define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_10_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_10_v16i32: +define <16 x i32> @ult_7_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_7_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6086,7 +4333,7 @@ define <16 x i32> @ult_10_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_10_v16i32: +; AVX512BW-LABEL: ult_7_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6106,14 +4353,14 @@ define <16 x i32> @ult_10_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_10_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_7_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_10_v16i32: +; BITALG-LABEL: ult_7_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6126,13 +4373,13 @@ define <16 x i32> @ult_10_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_10_v16i32: +define <16 x i32> @ugt_7_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_7_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6165,7 +4412,7 @@ define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_10_v16i32: +; AVX512BW-LABEL: ugt_7_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6185,14 +4432,14 @@ define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_10_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_7_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_10_v16i32: +; BITALG-LABEL: ugt_7_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6205,13 +4452,13 @@ define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_11_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_11_v16i32: +define <16 x i32> @ult_8_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_8_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6244,7 +4491,7 @@ define <16 x i32> @ult_11_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_11_v16i32: +; AVX512BW-LABEL: ult_8_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6264,14 +4511,14 @@ define <16 x i32> @ult_11_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_11_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_8_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_11_v16i32: +; BITALG-LABEL: ult_8_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6284,13 +4531,13 @@ define <16 x i32> @ult_11_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_11_v16i32: +define <16 x i32> @ugt_8_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_8_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6323,7 +4570,7 @@ define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_11_v16i32: +; AVX512BW-LABEL: ugt_8_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6343,14 +4590,14 @@ define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_11_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_8_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_11_v16i32: +; BITALG-LABEL: ugt_8_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6363,13 +4610,13 @@ define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_12_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_12_v16i32: +define <16 x i32> @ult_9_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_9_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6402,7 +4649,7 @@ define <16 x i32> @ult_12_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_12_v16i32: +; AVX512BW-LABEL: ult_9_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6422,14 +4669,14 @@ define <16 x i32> @ult_12_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_12_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_9_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_12_v16i32: +; BITALG-LABEL: ult_9_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6442,13 +4689,13 @@ define <16 x i32> @ult_12_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_12_v16i32: +define <16 x i32> @ugt_9_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_9_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6481,7 +4728,7 @@ define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_12_v16i32: +; AVX512BW-LABEL: ugt_9_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6501,14 +4748,14 @@ define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_12_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_9_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_12_v16i32: +; BITALG-LABEL: ugt_9_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6521,13 +4768,13 @@ define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_13_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_13_v16i32: +define <16 x i32> @ult_10_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_10_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6560,7 +4807,7 @@ define <16 x i32> @ult_13_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_13_v16i32: +; AVX512BW-LABEL: ult_10_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6580,14 +4827,14 @@ define <16 x i32> @ult_13_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_13_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_10_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_13_v16i32: +; BITALG-LABEL: ult_10_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6600,13 +4847,13 @@ define <16 x i32> @ult_13_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_13_v16i32: +define <16 x i32> @ugt_10_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_10_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6639,7 +4886,7 @@ define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_13_v16i32: +; AVX512BW-LABEL: ugt_10_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6659,14 +4906,14 @@ define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_13_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_10_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_13_v16i32: +; BITALG-LABEL: ugt_10_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6679,13 +4926,13 @@ define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_14_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_14_v16i32: +define <16 x i32> @ult_11_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_11_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6718,7 +4965,7 @@ define <16 x i32> @ult_14_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_14_v16i32: +; AVX512BW-LABEL: ult_11_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6738,14 +4985,14 @@ define <16 x i32> @ult_14_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_14_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_11_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_14_v16i32: +; BITALG-LABEL: ult_11_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6758,13 +5005,13 @@ define <16 x i32> @ult_14_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_14_v16i32: +define <16 x i32> @ugt_11_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_11_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6797,7 +5044,7 @@ define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_14_v16i32: +; AVX512BW-LABEL: ugt_11_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6817,14 +5064,14 @@ define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_14_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_11_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_14_v16i32: +; BITALG-LABEL: ugt_11_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6837,13 +5084,13 @@ define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_15_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_15_v16i32: +define <16 x i32> @ult_12_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_12_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6876,7 +5123,7 @@ define <16 x i32> @ult_15_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_15_v16i32: +; AVX512BW-LABEL: ult_12_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6896,14 +5143,14 @@ define <16 x i32> @ult_15_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_15_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_12_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_15_v16i32: +; BITALG-LABEL: ult_12_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6916,13 +5163,13 @@ define <16 x i32> @ult_15_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_15_v16i32: +define <16 x i32> @ugt_12_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_12_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -6955,7 +5202,7 @@ define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_15_v16i32: +; AVX512BW-LABEL: ugt_12_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -6975,14 +5222,14 @@ define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_15_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_12_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_15_v16i32: +; BITALG-LABEL: ugt_12_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6995,13 +5242,13 @@ define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_16_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_16_v16i32: +define <16 x i32> @ult_13_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_13_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7034,7 +5281,7 @@ define <16 x i32> @ult_16_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_16_v16i32: +; AVX512BW-LABEL: ult_13_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7054,14 +5301,14 @@ define <16 x i32> @ult_16_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_16_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_13_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_16_v16i32: +; BITALG-LABEL: ult_13_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7074,13 +5321,13 @@ define <16 x i32> @ult_16_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_16_v16i32: +define <16 x i32> @ugt_13_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_13_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7113,7 +5360,7 @@ define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_16_v16i32: +; AVX512BW-LABEL: ugt_13_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7133,14 +5380,14 @@ define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_16_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_13_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_16_v16i32: +; BITALG-LABEL: ugt_13_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7153,13 +5400,13 @@ define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_17_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_17_v16i32: +define <16 x i32> @ult_14_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_14_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7192,7 +5439,7 @@ define <16 x i32> @ult_17_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_17_v16i32: +; AVX512BW-LABEL: ult_14_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7212,14 +5459,14 @@ define <16 x i32> @ult_17_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_17_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_14_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_17_v16i32: +; BITALG-LABEL: ult_14_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7232,13 +5479,13 @@ define <16 x i32> @ult_17_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_17_v16i32: +define <16 x i32> @ugt_14_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_14_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7271,7 +5518,7 @@ define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_17_v16i32: +; AVX512BW-LABEL: ugt_14_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7291,14 +5538,14 @@ define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_17_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_14_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_17_v16i32: +; BITALG-LABEL: ugt_14_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7311,13 +5558,13 @@ define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_18_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_18_v16i32: +define <16 x i32> @ult_15_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_15_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7350,7 +5597,7 @@ define <16 x i32> @ult_18_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_18_v16i32: +; AVX512BW-LABEL: ult_15_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7370,14 +5617,14 @@ define <16 x i32> @ult_18_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_18_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_15_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_18_v16i32: +; BITALG-LABEL: ult_15_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7390,13 +5637,13 @@ define <16 x i32> @ult_18_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_18_v16i32: +define <16 x i32> @ugt_15_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_15_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7429,7 +5676,7 @@ define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_18_v16i32: +; AVX512BW-LABEL: ugt_15_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7449,14 +5696,14 @@ define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_18_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_15_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_18_v16i32: +; BITALG-LABEL: ugt_15_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7469,13 +5716,13 @@ define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_19_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_19_v16i32: +define <16 x i32> @ult_16_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_16_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7508,7 +5755,7 @@ define <16 x i32> @ult_19_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_19_v16i32: +; AVX512BW-LABEL: ult_16_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7528,14 +5775,14 @@ define <16 x i32> @ult_19_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_19_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_16_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_19_v16i32: +; BITALG-LABEL: ult_16_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7548,13 +5795,13 @@ define <16 x i32> @ult_19_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_19_v16i32: +define <16 x i32> @ugt_16_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_16_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7587,7 +5834,7 @@ define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_19_v16i32: +; AVX512BW-LABEL: ugt_16_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7607,14 +5854,14 @@ define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_19_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_16_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_19_v16i32: +; BITALG-LABEL: ugt_16_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7627,13 +5874,13 @@ define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_20_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_20_v16i32: +define <16 x i32> @ult_17_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_17_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7666,7 +5913,7 @@ define <16 x i32> @ult_20_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_20_v16i32: +; AVX512BW-LABEL: ult_17_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7686,14 +5933,14 @@ define <16 x i32> @ult_20_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_20_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_17_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_20_v16i32: +; BITALG-LABEL: ult_17_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7706,13 +5953,13 @@ define <16 x i32> @ult_20_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_20_v16i32: +define <16 x i32> @ugt_17_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_17_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7745,7 +5992,7 @@ define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_20_v16i32: +; AVX512BW-LABEL: ugt_17_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7765,14 +6012,14 @@ define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_20_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_17_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_20_v16i32: +; BITALG-LABEL: ugt_17_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7785,13 +6032,13 @@ define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_21_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_21_v16i32: +define <16 x i32> @ult_18_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_18_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7824,7 +6071,7 @@ define <16 x i32> @ult_21_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_21_v16i32: +; AVX512BW-LABEL: ult_18_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7844,14 +6091,14 @@ define <16 x i32> @ult_21_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_21_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_18_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_21_v16i32: +; BITALG-LABEL: ult_18_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7864,13 +6111,13 @@ define <16 x i32> @ult_21_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_21_v16i32: +define <16 x i32> @ugt_18_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_18_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7903,7 +6150,7 @@ define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_21_v16i32: +; AVX512BW-LABEL: ugt_18_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -7923,14 +6170,14 @@ define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_21_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_18_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_21_v16i32: +; BITALG-LABEL: ugt_18_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -7943,13 +6190,13 @@ define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_22_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_22_v16i32: +define <16 x i32> @ult_19_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_19_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -7982,7 +6229,7 @@ define <16 x i32> @ult_22_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_22_v16i32: +; AVX512BW-LABEL: ult_19_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8002,14 +6249,14 @@ define <16 x i32> @ult_22_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_22_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_19_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_22_v16i32: +; BITALG-LABEL: ult_19_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8022,13 +6269,13 @@ define <16 x i32> @ult_22_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_22_v16i32: +define <16 x i32> @ugt_19_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_19_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8061,7 +6308,7 @@ define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_22_v16i32: +; AVX512BW-LABEL: ugt_19_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8081,14 +6328,14 @@ define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_22_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_19_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_22_v16i32: +; BITALG-LABEL: ugt_19_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8101,13 +6348,13 @@ define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_23_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_23_v16i32: +define <16 x i32> @ult_20_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_20_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8140,7 +6387,7 @@ define <16 x i32> @ult_23_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_23_v16i32: +; AVX512BW-LABEL: ult_20_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8160,14 +6407,14 @@ define <16 x i32> @ult_23_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_23_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_20_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_23_v16i32: +; BITALG-LABEL: ult_20_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8180,13 +6427,13 @@ define <16 x i32> @ult_23_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_23_v16i32: +define <16 x i32> @ugt_20_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_20_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8219,7 +6466,7 @@ define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_23_v16i32: +; AVX512BW-LABEL: ugt_20_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8239,14 +6486,14 @@ define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_23_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_20_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_23_v16i32: +; BITALG-LABEL: ugt_20_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8259,13 +6506,13 @@ define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_24_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_24_v16i32: +define <16 x i32> @ult_21_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_21_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8298,7 +6545,7 @@ define <16 x i32> @ult_24_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_24_v16i32: +; AVX512BW-LABEL: ult_21_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8318,14 +6565,14 @@ define <16 x i32> @ult_24_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_24_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_21_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_24_v16i32: +; BITALG-LABEL: ult_21_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8338,13 +6585,13 @@ define <16 x i32> @ult_24_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_24_v16i32: +define <16 x i32> @ugt_21_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_21_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8377,7 +6624,7 @@ define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_24_v16i32: +; AVX512BW-LABEL: ugt_21_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8397,14 +6644,14 @@ define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_24_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_21_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_24_v16i32: +; BITALG-LABEL: ugt_21_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8417,13 +6664,13 @@ define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_25_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_25_v16i32: +define <16 x i32> @ult_22_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_22_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8456,7 +6703,7 @@ define <16 x i32> @ult_25_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_25_v16i32: +; AVX512BW-LABEL: ult_22_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8476,14 +6723,14 @@ define <16 x i32> @ult_25_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_25_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_22_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_25_v16i32: +; BITALG-LABEL: ult_22_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8496,13 +6743,13 @@ define <16 x i32> @ult_25_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_25_v16i32: +define <16 x i32> @ugt_22_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_22_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8535,7 +6782,7 @@ define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_25_v16i32: +; AVX512BW-LABEL: ugt_22_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8555,14 +6802,14 @@ define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_25_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_22_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_25_v16i32: +; BITALG-LABEL: ugt_22_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8575,13 +6822,13 @@ define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_26_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_26_v16i32: +define <16 x i32> @ult_23_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_23_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8614,7 +6861,7 @@ define <16 x i32> @ult_26_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_26_v16i32: +; AVX512BW-LABEL: ult_23_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8634,14 +6881,14 @@ define <16 x i32> @ult_26_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_26_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_23_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_26_v16i32: +; BITALG-LABEL: ult_23_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8654,13 +6901,13 @@ define <16 x i32> @ult_26_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_26_v16i32: +define <16 x i32> @ugt_23_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_23_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8693,7 +6940,7 @@ define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_26_v16i32: +; AVX512BW-LABEL: ugt_23_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8713,14 +6960,14 @@ define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_26_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_23_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_26_v16i32: +; BITALG-LABEL: ugt_23_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8733,13 +6980,13 @@ define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_27_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_27_v16i32: +define <16 x i32> @ult_24_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_24_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8772,7 +7019,7 @@ define <16 x i32> @ult_27_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_27_v16i32: +; AVX512BW-LABEL: ult_24_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8792,14 +7039,14 @@ define <16 x i32> @ult_27_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_27_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_24_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_27_v16i32: +; BITALG-LABEL: ult_24_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8812,13 +7059,13 @@ define <16 x i32> @ult_27_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_27_v16i32: +define <16 x i32> @ugt_24_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_24_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8851,7 +7098,7 @@ define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_27_v16i32: +; AVX512BW-LABEL: ugt_24_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8871,14 +7118,14 @@ define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_27_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_24_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_27_v16i32: +; BITALG-LABEL: ugt_24_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8891,13 +7138,13 @@ define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_28_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_28_v16i32: +define <16 x i32> @ult_25_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_25_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -8930,7 +7177,7 @@ define <16 x i32> @ult_28_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_28_v16i32: +; AVX512BW-LABEL: ult_25_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -8950,14 +7197,14 @@ define <16 x i32> @ult_28_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_28_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_25_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_28_v16i32: +; BITALG-LABEL: ult_25_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -8970,13 +7217,13 @@ define <16 x i32> @ult_28_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_28_v16i32: +define <16 x i32> @ugt_25_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_25_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9009,7 +7256,7 @@ define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_28_v16i32: +; AVX512BW-LABEL: ugt_25_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9029,14 +7276,14 @@ define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_28_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_25_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_28_v16i32: +; BITALG-LABEL: ugt_25_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9049,13 +7296,13 @@ define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_29_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_29_v16i32: +define <16 x i32> @ult_26_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_26_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9088,7 +7335,7 @@ define <16 x i32> @ult_29_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_29_v16i32: +; AVX512BW-LABEL: ult_26_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9108,14 +7355,14 @@ define <16 x i32> @ult_29_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_29_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_26_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_29_v16i32: +; BITALG-LABEL: ult_26_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9128,13 +7375,13 @@ define <16 x i32> @ult_29_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_29_v16i32: +define <16 x i32> @ugt_26_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_26_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9167,7 +7414,7 @@ define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_29_v16i32: +; AVX512BW-LABEL: ugt_26_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9187,14 +7434,14 @@ define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_29_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_26_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_29_v16i32: +; BITALG-LABEL: ugt_26_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9207,13 +7454,13 @@ define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_30_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_30_v16i32: +define <16 x i32> @ult_27_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_27_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9246,7 +7493,7 @@ define <16 x i32> @ult_30_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_30_v16i32: +; AVX512BW-LABEL: ult_27_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9266,14 +7513,14 @@ define <16 x i32> @ult_30_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_30_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_27_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_30_v16i32: +; BITALG-LABEL: ult_27_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9286,13 +7533,13 @@ define <16 x i32> @ult_30_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_30_v16i32: +define <16 x i32> @ugt_27_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_27_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9325,7 +7572,7 @@ define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_30_v16i32: +; AVX512BW-LABEL: ugt_27_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9345,14 +7592,14 @@ define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_30_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_27_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_30_v16i32: +; BITALG-LABEL: ugt_27_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9365,13 +7612,13 @@ define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_31_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_31_v16i32: +define <16 x i32> @ult_28_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_28_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9404,7 +7651,7 @@ define <16 x i32> @ult_31_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_31_v16i32: +; AVX512BW-LABEL: ult_28_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9424,14 +7671,14 @@ define <16 x i32> @ult_31_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_31_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_28_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_31_v16i32: +; BITALG-LABEL: ult_28_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9444,13 +7691,13 @@ define <16 x i32> @ult_31_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_31_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_31_v16i32: +define <16 x i32> @ugt_28_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_28_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9483,7 +7730,7 @@ define <16 x i32> @ugt_31_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_31_v16i32: +; AVX512BW-LABEL: ugt_28_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9503,14 +7750,14 @@ define <16 x i32> @ugt_31_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_31_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_28_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_31_v16i32: +; BITALG-LABEL: ugt_28_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9523,13 +7770,13 @@ define <16 x i32> @ugt_31_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_32_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_32_v16i32: +define <16 x i32> @ult_29_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_29_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9562,7 +7809,7 @@ define <16 x i32> @ult_32_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_32_v16i32: +; AVX512BW-LABEL: ult_29_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9582,14 +7829,14 @@ define <16 x i32> @ult_32_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_32_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_29_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_32_v16i32: +; BITALG-LABEL: ult_29_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9602,13 +7849,13 @@ define <16 x i32> @ult_32_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ugt_32_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ugt_32_v16i32: +define <16 x i32> @ugt_29_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_29_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9641,7 +7888,7 @@ define <16 x i32> @ugt_32_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_32_v16i32: +; AVX512BW-LABEL: ugt_29_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9661,14 +7908,14 @@ define <16 x i32> @ugt_32_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_32_v16i32: +; AVX512VPOPCNTDQ-LABEL: ugt_29_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_32_v16i32: +; BITALG-LABEL: ugt_29_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9681,13 +7928,13 @@ define <16 x i32> @ugt_32_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ugt <16 x i32> %2, + %3 = icmp ugt <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <16 x i32> @ult_33_v16i32(<16 x i32> %0) { -; AVX512F-LABEL: ult_33_v16i32: +define <16 x i32> @ult_30_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_30_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9720,7 +7967,7 @@ define <16 x i32> @ult_33_v16i32(<16 x i32> %0) { ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_33_v16i32: +; AVX512BW-LABEL: ult_30_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9740,14 +7987,14 @@ define <16 x i32> @ult_33_v16i32(<16 x i32> %0) { ; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_33_v16i32: +; AVX512VPOPCNTDQ-LABEL: ult_30_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_33_v16i32: +; BITALG-LABEL: ult_30_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -9760,24 +8007,13 @@ define <16 x i32> @ult_33_v16i32(<16 x i32> %0) { ; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) - %3 = icmp ult <16 x i32> %2, + %3 = icmp ult <16 x i32> %2, %4 = sext <16 x i1> %3 to <16 x i32> ret <16 x i32> %4 } -define <8 x i64> @ult_0_v8i64(<8 x i64> %0) { -; AVX512-LABEL: ult_0_v8i64: -; AVX512: # %bb.0: -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: retq - %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0) - %3 = icmp ult <8 x i64> %2, - %4 = sext <8 x i1> %3 to <8 x i64> - ret <8 x i64> %4 -} - -define <8 x i64> @ugt_0_v8i64(<8 x i64> %0) { -; AVX512F-LABEL: ugt_0_v8i64: +define <16 x i32> @ugt_30_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ugt_30_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9789,20 +8025,28 @@ define <8 x i64> @ugt_0_v8i64(<8 x i64> %0) { ; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] +; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 +; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] ; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 ; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 +; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] +; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] ; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ugt_0_v8i64: +; AVX512BW-LABEL: ugt_30_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9813,34 +8057,42 @@ define <8 x i64> @ugt_0_v8i64(<8 x i64> %0) { ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 +; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ugt_0_v8i64: +; AVX512VPOPCNTDQ-LABEL: ugt_30_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vptestmq %zmm0, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ugt_0_v8i64: +; BITALG-LABEL: ugt_30_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 +; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vptestmq %zmm0, %zmm0, %k1 -; BITALG-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 +; BITALG-NEXT: vpcmpnleud {{.*}}(%rip){1to16}, %zmm0, %k1 +; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0) - %3 = icmp ugt <8 x i64> %2, - %4 = sext <8 x i1> %3 to <8 x i64> - ret <8 x i64> %4 + %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) + %3 = icmp ugt <16 x i32> %2, + %4 = sext <16 x i1> %3 to <16 x i32> + ret <16 x i32> %4 } -define <8 x i64> @ult_1_v8i64(<8 x i64> %0) { -; AVX512F-LABEL: ult_1_v8i64: +define <16 x i32> @ult_31_v16i32(<16 x i32> %0) { +; AVX512F-LABEL: ult_31_v16i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -9852,20 +8104,28 @@ define <8 x i64> @ult_1_v8i64(<8 x i64> %0) { ; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] +; AVX512F-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 +; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] ; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 ; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 +; AVX512F-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] +; AVX512F-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] ; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: ult_1_v8i64: +; AVX512BW-LABEL: ult_31_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 @@ -9876,30 +8136,38 @@ define <8 x i64> @ult_1_v8i64(<8 x i64> %0) { ; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 +; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512VPOPCNTDQ-LABEL: ult_1_v8i64: +; AVX512VPOPCNTDQ-LABEL: ult_31_v16i32: ; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 +; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VPOPCNTDQ-NEXT: retq ; -; BITALG-LABEL: ult_1_v8i64: +; BITALG-LABEL: ult_31_v16i32: ; BITALG: # %bb.0: ; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 +; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; BITALG-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; BITALG-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 +; BITALG-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1 +; BITALG-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; BITALG-NEXT: retq - %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0) - %3 = icmp ult <8 x i64> %2, - %4 = sext <8 x i1> %3 to <8 x i64> - ret <8 x i64> %4 + %2 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %0) + %3 = icmp ult <16 x i32> %2, + %4 = sext <16 x i1> %3 to <16 x i32> + ret <16 x i32> %4 } define <8 x i64> @ugt_1_v8i64(<8 x i64> %0) { @@ -17662,258 +15930,6 @@ define <8 x i64> @ult_63_v8i64(<8 x i64> %0) { ret <8 x i64> %4 } -define <8 x i64> @ugt_63_v8i64(<8 x i64> %0) { -; AVX512F-LABEL: ugt_63_v8i64: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_63_v8i64: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_63_v8i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ugt_63_v8i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; BITALG-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0) - %3 = icmp ugt <8 x i64> %2, - %4 = sext <8 x i1> %3 to <8 x i64> - ret <8 x i64> %4 -} - -define <8 x i64> @ult_64_v8i64(<8 x i64> %0) { -; AVX512F-LABEL: ult_64_v8i64: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_64_v8i64: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_64_v8i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ult_64_v8i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; BITALG-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0) - %3 = icmp ult <8 x i64> %2, - %4 = sext <8 x i1> %3 to <8 x i64> - ret <8 x i64> %4 -} - -define <8 x i64> @ugt_64_v8i64(<8 x i64> %0) { -; AVX512F-LABEL: ugt_64_v8i64: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ugt_64_v8i64: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ugt_64_v8i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ugt_64_v8i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpnleuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; BITALG-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0) - %3 = icmp ugt <8 x i64> %2, - %4 = sext <8 x i1> %3 to <8 x i64> - ret <8 x i64> %4 -} - -define <8 x i64> @ult_65_v8i64(<8 x i64> %0) { -; AVX512F-LABEL: ult_65_v8i64: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 -; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpaddb %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: ult_65_v8i64: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VPOPCNTDQ-LABEL: ult_65_v8i64: -; AVX512VPOPCNTDQ: # %bb.0: -; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0 -; AVX512VPOPCNTDQ-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; AVX512VPOPCNTDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VPOPCNTDQ-NEXT: retq -; -; BITALG-LABEL: ult_65_v8i64: -; BITALG: # %bb.0: -; BITALG-NEXT: vpopcntb %zmm0, %zmm0 -; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 -; BITALG-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; BITALG-NEXT: retq - %2 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %0) - %3 = icmp ult <8 x i64> %2, - %4 = sext <8 x i1> %3 to <8 x i64> - ret <8 x i64> %4 -} - declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>) declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>) declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)