--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
+
+define <4 x i1> @illegal_abs_unchanged(<4 x i8> %x) {
+; AVX512-LABEL: illegal_abs_unchanged:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsb %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_unchanged:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsb %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_unchanged:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsb %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_unchanged:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pminub %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: retq
+ %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true)
+ %cmp = icmp eq <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @illegal_abs_unchanged2(<4 x i8> %x) {
+; AVX512-LABEL: illegal_abs_unchanged2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsb %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_unchanged2:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsb %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_unchanged2:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsb %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: pxor %xmm0, %xmm1
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_unchanged2:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pminub %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: retq
+ %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true)
+ %cmp = icmp ne <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @illegal_abs_to_eq_or(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_eq_or:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_eq_or:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_eq_or:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm0, %xmm4
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
+; SSE41-NEXT: psubq %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: packssdw %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_eq_or:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: packssdw %xmm3, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @illegal_abs_to_eq_or_sext(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_eq_or_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_eq_or_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_eq_or_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm1, %xmm4
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
+; SSE41-NEXT: psubq %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_eq_or_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @illegal_abs_to_ne_and(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_ne_and:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_ne_and:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_ne_and:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm0, %xmm4
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
+; SSE41-NEXT: psubq %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm2
+; SSE41-NEXT: packssdw %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_ne_and:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: packssdw %xmm3, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @illegal_abs_to_ne_and_sext(<4 x i64> %x) {
+; AVX512-LABEL: illegal_abs_to_ne_and_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsq %ymm0, %ymm0
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: illegal_abs_to_ne_and_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: illegal_abs_to_ne_and_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: psubq %xmm1, %xmm4
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
+; SSE41-NEXT: psubq %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: pxor %xmm3, %xmm2
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: illegal_abs_to_ne_and_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm3, %xmm1
+; SSE2-NEXT: retq
+ %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true)
+ %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129>
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @legal_abs_eq_unchanged(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_eq_unchanged:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_eq_unchanged:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_eq_unchanged:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_eq_unchanged:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @legal_abs_eq_unchanged_sext(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_eq_unchanged_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_eq_unchanged_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_eq_unchanged_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_eq_unchanged_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @legal_abs_ne_unchangedd(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_ne_unchangedd:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_ne_unchangedd:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_ne_unchangedd:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_ne_unchangedd:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @legal_abs_ne_unchangedd_sext(<4 x i32> %x) {
+; AVX512-LABEL: legal_abs_ne_unchangedd_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpabsd %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: legal_abs_ne_unchangedd_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpabsd %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: legal_abs_ne_unchangedd_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pabsd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: legal_abs_ne_unchangedd_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
+ %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129>
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: orps %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm3, %xmm0
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x64_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0
+; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x64_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x64_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm3, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: orps %xmm2, %xmm0
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE41-NEXT: psllq $63, %xmm0
+; SSE41-NEXT: psrad $31, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm3, %xmm0
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psllq $63, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x64:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: xorps %xmm3, %xmm0
+; SSE41-NEXT: andnps %xmm0, %xmm2
+; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: xorps %xmm3, %xmm0
+; SSE2-NEXT: andnps %xmm0, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x64_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
+; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1}
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x64_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129]
+; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487]
+; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x64_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
+; SSE41-NEXT: pcmpeqq %xmm0, %xmm2
+; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
+; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE41-NEXT: xorps %xmm3, %xmm0
+; SSE41-NEXT: andnps %xmm0, %xmm2
+; SSE41-NEXT: pmovsxdq %xmm2, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
+; SSE41-NEXT: psllq $63, %xmm1
+; SSE41-NEXT: psrad $31, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [129,129]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: xorps %xmm3, %xmm2
+; SSE2-NEXT: andnps %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psllq $63, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
+ %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %r
+}
+
+define <4 x i1> @eq_or_to_abs_vec4x32(<4 x i32> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @eq_or_to_abs_vec4x32_sext(<4 x i32> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x32_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x32_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x32_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x32_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1}
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x32:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x32_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1
+; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1}
+; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x32_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x32_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x32_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %r
+}
+
+define <4 x i1> @eq_or_to_abs_vec4x16(<4 x i16> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpmovsxwd %xmm1, %xmm1
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x16:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
+; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
+ %cmp2 = icmp eq <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i8> @eq_or_to_abs_vec4x8_sext(<4 x i8> %x) {
+; AVX512-LABEL: eq_or_to_abs_vec4x8_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: korw %k1, %k0, %k1
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: eq_or_to_abs_vec4x8_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: eq_or_to_abs_vec4x8_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: eq_or_to_abs_vec4x8_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp eq <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
+ %cmp2 = icmp eq <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
+ %cmp = or <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i8>
+ ret <4 x i8> %r
+}
+
+define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0
+; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: kandw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1
+; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: pmovsxbd %xmm1, %xmm1
+; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
+; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: psrad $24, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
+ %cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) {
+; AVX512-LABEL: ne_and_to_abs_vec4x16_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0
+; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1
+; AVX512-NEXT: kandw %k1, %k0, %k1
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: ne_and_to_abs_vec4x16_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; SSE41-LABEL: ne_and_to_abs_vec4x16_sext:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pandn %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE2-LABEL: ne_and_to_abs_vec4x16_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
+; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+ %cmp1 = icmp ne <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
+ %cmp2 = icmp ne <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
+ %cmp = and <4 x i1> %cmp1, %cmp2
+ %r = sext <4 x i1> %cmp to <4 x i16>
+ ret <4 x i16> %r
+}