;
; AVX512-LABEL: test13:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
-; AVX512-NEXT: vpsubb %zmm2, %zmm0, %zmm1
-; AVX512-NEXT: vpcmpeqb %zmm2, %zmm0, %k1
-; AVX512-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
+; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512-NEXT: vpcmpneqb %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm1 {%k1}
; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512-NEXT: retq
%1 = add <64 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
;
; AVX512-LABEL: test31:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
-; AVX512-NEXT: vpsubw %zmm2, %zmm0, %zmm1
-; AVX512-NEXT: vpcmpeqw %zmm2, %zmm0, %k1
-; AVX512-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1}
+; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512-NEXT: vpcmpneqw %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpsubw %zmm1, %zmm0, %zmm1 {%k1}
; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512-NEXT: retq
%1 = add <32 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; AVX512-LABEL: select_sub:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
-; AVX512-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
-; AVX512-NEXT: vpsubq %zmm2, %zmm1, %zmm1
-; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
+; AVX512-NEXT: vpsubq %zmm2, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
; AVX512-LABEL: select_add:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
-; AVX512-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
-; AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm1
-; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
+; AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
; AVX512-LABEL: select_and:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
-; AVX512-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
-; AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
+; AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
; AVX512-LABEL: select_xor:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
-; AVX512-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
-; AVX512-NEXT: vpxorq %zmm2, %zmm1, %zmm1
-; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
+; AVX512-NEXT: vpxorq %zmm2, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
; AVX512-LABEL: select_shl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
-; AVX512-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
-; AVX512-NEXT: vpsllvq %zmm2, %zmm1, %zmm1
-; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
+; AVX512-NEXT: vpsllvq %zmm2, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
; AVX512-LABEL: select_srl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
-; AVX512-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
-; AVX512-NEXT: vpsrlvq %zmm2, %zmm1, %zmm1
-; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
+; AVX512-NEXT: vpsrlvq %zmm2, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
; AVX512-LABEL: select_sra:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm3
-; AVX512-NEXT: vptestmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
-; AVX512-NEXT: vpsravq %zmm2, %zmm1, %zmm1
-; AVX512-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
+; AVX512-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm3, %k1
+; AVX512-NEXT: vpsravq %zmm2, %zmm1, %zmm0 {%k1}
; AVX512-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i64>, ptr %ptr, i64 1
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512F-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
+; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
; AVX512F-NEXT: vpmulld %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: select_mul:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512VL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
-; AVX512VL-NEXT: vpmulld %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
-; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
+; AVX512VL-NEXT: vpmulld %ymm2, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512F-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
+; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
; AVX512F-NEXT: vpmaxsd %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: select_smax:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512VL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
-; AVX512VL-NEXT: vpmaxsd %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
-; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
+; AVX512VL-NEXT: vpmaxsd %ymm2, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512F-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
+; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
; AVX512F-NEXT: vpminsd %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: select_smin:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512VL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
-; AVX512VL-NEXT: vpminsd %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
-; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
+; AVX512VL-NEXT: vpminsd %ymm2, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512F-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
+; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
; AVX512F-NEXT: vpmaxud %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: select_umax:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512VL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
-; AVX512VL-NEXT: vpmaxud %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
-; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
+; AVX512VL-NEXT: vpmaxud %ymm2, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512F-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
+; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm3, %k1
; AVX512F-NEXT: vpminud %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: select_umin:
; AVX512VL: # %bb.0: # %entry
; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX512VL-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
-; AVX512VL-NEXT: vpminud %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
-; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
+; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %k1
+; AVX512VL-NEXT: vpminud %ymm2, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT: retq
entry:
%arrayidx = getelementptr inbounds <8 x i32>, ptr %ptr, i32 1
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512-NEXT: vpsubd %zmm2, %zmm1, %zmm0
-; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpsubd %zmm2, %zmm1, %zmm1 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512-NEXT: retq
%s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
%r = sub <16 x i32> %x, %s
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512-NEXT: vpsllvd %zmm2, %zmm1, %zmm0
-; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpsllvd %zmm2, %zmm1, %zmm1 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512-NEXT: retq
%s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
%r = shl <16 x i32> %x, %s
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512-NEXT: vpsrlvd %zmm2, %zmm1, %zmm0
-; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512-NEXT: retq
%s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
%r = lshr <16 x i32> %x, %s
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512-NEXT: vpsravd %zmm2, %zmm1, %zmm0
-; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
+; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vpsravd %zmm2, %zmm1, %zmm1 {%k1}
+; AVX512-NEXT: vmovdqa64 %zmm1, %zmm0
; AVX512-NEXT: retq
%s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
%r = ashr <16 x i32> %x, %s