break;
}
case ISD::VSELECT: {
- // Replace VSELECT with non-mask conditions with with BLENDV.
- if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+ // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
+ EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
+ if (EleVT == MVT::i1)
break;
assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
assert(N->getValueType(0).getVectorElementType() != MVT::i16 &&
"We can't replace VSELECT with BLENDV in vXi16!");
- SDValue Blendv =
- CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1), N->getOperand(2));
+ SDValue R;
+ if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) ==
+ EleVT.getSizeInBits()) {
+ R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8));
+ } else {
+ R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1),
+ N->getOperand(2));
+ }
--I;
- CurDAG->ReplaceAllUsesWith(N, Blendv.getNode());
+ CurDAG->ReplaceAllUsesWith(N, R.getNode());
++I;
MadeChange = true;
continue;
; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
-; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86NOBW-NEXT: retl # encoding: [0xc3]
; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
-; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64NOBW-NEXT: retq # encoding: [0xc3]
; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
-; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
-; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
-; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86NOBW-NEXT: retl # encoding: [0xc3]
; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd]
; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc]
-; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3]
; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64NOBW-NEXT: retq # encoding: [0xc3]
; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
-; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed]
; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01]
; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc]
-; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50]
+; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8]
; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
; X86NOBW-LABEL: test_vgf2p8mulb_128_mask:
; X86NOBW: # %bb.0:
; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
-; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
-; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
-; X86NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10]
+; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
+; X86NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
; X64NOBW-LABEL: test_vgf2p8mulb_128_mask:
; X64NOBW: # %bb.0:
; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1]
-; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
-; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
-; X64NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10]
+; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
+; X64NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca]
; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i16 %mask to <16 x i1>
; X86NOBW: # %bb.0:
; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06]
-; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
-; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
-; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
+; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
+; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
-; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01]
-; X86NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10]
+; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
+; X86NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
; X86NOBW-NEXT: retl # encoding: [0xc3]
;
; X64NOBW-LABEL: test_vgf2p8mulb_256_mask:
; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10]
; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
-; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1]
-; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff]
-; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9]
+; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9]
+; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
+; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff]
; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb]
-; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01]
-; X64NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10]
+; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01]
+; X64NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca]
; X64NOBW-NEXT: retq # encoding: [0xc3]
%1 = bitcast i32 %mask to <32 x i1>
%2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2)
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
+; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2
-; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
+; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v16i16:
; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
+; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v32i8:
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2
+; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
+; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v16i16_from_v8i16:
; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2
; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3
+; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0
; AVX512VLDQ-NEXT: retq
;
; AVX512VLBW-LABEL: var_shuffle_v32i8_from_v16i8:
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8:
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0
+; AVX512VL-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8_split:
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vpternlogq $226, %xmm2, %xmm0, %xmm1
; AVX512-NEXT: vmovq %xmm0, (%rdi)
; AVX512-NEXT: vmovq %xmm1, (%rsi)
; AVX512-NEXT: vzeroupper
; Sorry 16-bit, you're not important enough to support?
define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) {
-; AVX-LABEL: signbit_sel_v8i16:
-; AVX: # %bb.0:
-; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
-; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX12-LABEL: signbit_sel_v8i16:
+; AVX12: # %bb.0:
+; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX12-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
+; AVX12-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX12-NEXT: retq
+;
+; AVX512F-LABEL: signbit_sel_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
+; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: signbit_sel_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
+; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: signbit_sel_v8i16:
; XOP: # %bb.0:
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: signbit_sel_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
-; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: signbit_sel_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: signbit_sel_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: signbit_sel_v16i16:
; XOP: # %bb.0:
; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: blend_splat1_mask_cond_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_splat1_mask_cond_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_splat1_mask_cond_v16i16:
; XOP: # %bb.0:
; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT: retq
;
-; AVX512-LABEL: blend_splat1_mask_cond_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_splat1_mask_cond_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_splat1_mask_cond_v16i8:
; XOP: # %bb.0:
; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT: retq
;
-; AVX512-LABEL: blend_splatmax_mask_cond_v8i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
; XOP: # %bb.0:
; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: blend_splatmax_mask_cond_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
; XOP: # %bb.0:
; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: blend_splat_mask_cond_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_splat_mask_cond_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_splat_mask_cond_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_splat_mask_cond_v16i16:
; XOP: # %bb.0:
; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT: retq
;
-; AVX512-LABEL: blend_splat_mask_cond_v16i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
-; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_splat_mask_cond_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_splat_mask_cond_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_splat_mask_cond_v16i8:
; XOP: # %bb.0:
}
define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
-; AVX-LABEL: blend_mask_cond_v8i16:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
-; AVX-NEXT: retq
+; AVX12-LABEL: blend_mask_cond_v8i16:
+; AVX12: # %bb.0:
+; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX12-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
+; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; AVX12-NEXT: retq
+;
+; AVX512F-LABEL: blend_mask_cond_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_mask_cond_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_mask_cond_v8i16:
; XOP: # %bb.0:
}
define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
-; AVX-LABEL: blend_mask_cond_v16i8:
-; AVX: # %bb.0:
-; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
-; AVX-NEXT: retq
+; AVX12-LABEL: blend_mask_cond_v16i8:
+; AVX12: # %bb.0:
+; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX12-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; AVX12-NEXT: retq
+;
+; AVX512F-LABEL: blend_mask_cond_v16i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_mask_cond_v16i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_mask_cond_v16i8:
; XOP: # %bb.0:
; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: blend_mask_cond_v16i16:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_mask_cond_v16i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_mask_cond_v16i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_mask_cond_v16i16:
; XOP: # %bb.0:
; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: blend_mask_cond_v32i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
-; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: blend_mask_cond_v32i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: blend_mask_cond_v32i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
;
; XOP-LABEL: blend_mask_cond_v32i8:
; XOP: # %bb.0:
}
attributes #0 = { "no-nans-fp-math"="true" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}