;
; AVX-LABEL: allones_v4i32_sign:
; AVX: # %bb.0:
-; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: cmpb $15, %al
-; AVX-NEXT: sete %al
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vtestps %xmm1, %xmm0
+; AVX-NEXT: setb %al
; AVX-NEXT: retq
%tmp = icmp slt <4 x i32> %arg, zeroinitializer
%tmp1 = bitcast <4 x i1> %tmp to i4
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
-; AVX-LABEL: allones_v8i32_sign:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovmskps %ymm0, %eax
-; AVX-NEXT: cmpb $-1, %al
-; AVX-NEXT: sete %al
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX1-LABEL: allones_v8i32_sign:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vtestps %ymm1, %ymm0
+; AVX1-NEXT: setb %al
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: allones_v8i32_sign:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: setb %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: allones_v8i32_sign:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vtestps %ymm1, %ymm0
+; AVX512-NEXT: setb %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%tmp = icmp slt <8 x i32> %arg, zeroinitializer
%tmp1 = bitcast <8 x i1> %tmp to i8
%tmp2 = icmp eq i8 %tmp1, -1
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
-; AVX-LABEL: allones_v4i64_sign:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovmskpd %ymm0, %eax
-; AVX-NEXT: cmpb $15, %al
-; AVX-NEXT: sete %al
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX1-LABEL: allones_v4i64_sign:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vtestpd %ymm1, %ymm0
+; AVX1-NEXT: setb %al
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: allones_v4i64_sign:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestpd %ymm1, %ymm0
+; AVX2-NEXT: setb %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: allones_v4i64_sign:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vtestpd %ymm1, %ymm0
+; AVX512-NEXT: setb %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%tmp = icmp slt <4 x i64> %arg, zeroinitializer
%tmp1 = bitcast <4 x i1> %tmp to i4
%tmp2 = icmp eq i4 %tmp1, -1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: cmpl $15, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestps %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: allones_v8i64_sign:
; AVX2: # %bb.0:
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: cmpb $-1, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX1OR2-LABEL: allones_v4i32_and1:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $15, %al
-; AVX1OR2-NEXT: sete %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
+; AVX1OR2-NEXT: setb %al
; AVX1OR2-NEXT: retq
;
; KNL-LABEL: allones_v4i32_and1:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: cmpl $15, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestps %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: allones_v8i32_and1:
; AVX2: # %bb.0:
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: cmpb $-1, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX1OR2-LABEL: allones_v2i64_and1:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $3, %al
-; AVX1OR2-NEXT: sete %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
+; AVX1OR2-NEXT: setb %al
; AVX1OR2-NEXT: retq
;
; KNL-LABEL: allones_v2i64_and1:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskpd %xmm0, %eax
-; AVX1-NEXT: cmpl $3, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestpd %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: allones_v4i64_and1:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskpd %ymm0, %eax
-; AVX2-NEXT: cmpb $15, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestpd %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: cmpl $15, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestps %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: cmpb $-1, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX1OR2-LABEL: allones_v4i32_and4:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $15, %al
-; AVX1OR2-NEXT: sete %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
+; AVX1OR2-NEXT: setb %al
; AVX1OR2-NEXT: retq
;
; KNL-LABEL: allones_v4i32_and4:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: cmpl $15, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestps %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: allones_v8i32_and4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: cmpb $-1, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX1OR2-LABEL: allones_v2i64_and4:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $3, %al
-; AVX1OR2-NEXT: sete %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
+; AVX1OR2-NEXT: setb %al
; AVX1OR2-NEXT: retq
;
; KNL-LABEL: allones_v2i64_and4:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskpd %xmm0, %eax
-; AVX1-NEXT: cmpl $3, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestpd %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: allones_v4i64_and4:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskpd %ymm0, %eax
-; AVX2-NEXT: cmpb $15, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestpd %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: cmpl $15, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestps %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: cmpb $-1, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX1OR2-LABEL: movmsk_and_v2f64:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
-; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $3, %al
-; AVX1OR2-NEXT: sete %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
+; AVX1OR2-NEXT: setb %al
; AVX1OR2-NEXT: retq
;
; KNL-LABEL: movmsk_and_v2f64:
; AVX1OR2-LABEL: PR39665_c_ray_opt:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $3, %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
; AVX1OR2-NEXT: movl $42, %ecx
; AVX1OR2-NEXT: movl $99, %eax
-; AVX1OR2-NEXT: cmovel %ecx, %eax
+; AVX1OR2-NEXT: cmovbl %ecx, %eax
; AVX1OR2-NEXT: retq
;
; KNL-LABEL: PR39665_c_ray_opt:
; AVX-LABEL: test_v2f64_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovmskpd %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: cmpl $3, %ecx
-; AVX-NEXT: sete %al
-; AVX-NEXT: negq %rax
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vtestpd %xmm1, %xmm0
+; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
; SSE-NEXT: negq %rax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_v4f64_sext:
-; AVX: # %bb.0:
-; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vmovmskpd %ymm0, %ecx
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: cmpl $15, %ecx
-; AVX-NEXT: sete %al
-; AVX-NEXT: negq %rax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX1-LABEL: test_v4f64_sext:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vtestpd %ymm1, %ymm0
+; AVX1-NEXT: sbbq %rax, %rax
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_v4f64_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestpd %ymm1, %ymm0
+; AVX2-NEXT: sbbq %rax, %rax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_v4f64_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vtestpd %ymm1, %ymm0
+; AVX512-NEXT: sbbq %rax, %rax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
%s = sext <4 x i1> %c to <4 x i64>
%1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; AVX1OR2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskps %xmm0, %ecx
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1OR2-NEXT: xorl %eax, %eax
-; AVX1OR2-NEXT: cmpl $15, %ecx
-; AVX1OR2-NEXT: sete %al
-; AVX1OR2-NEXT: negq %rax
+; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
+; AVX1OR2-NEXT: sbbq %rax, %rax
; AVX1OR2-NEXT: vzeroupper
; AVX1OR2-NEXT: retq
;
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z}
; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: cmpl $15, %ecx
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: negq %rax
+; AVX512-NEXT: vtestps %xmm0, %xmm1
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
; AVX-LABEL: test_v4f32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: cmpl $15, %ecx
-; AVX-NEXT: sete %al
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vtestps %xmm1, %xmm0
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
; SSE-NEXT: negl %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_v8f32_sext:
-; AVX: # %bb.0:
-; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vmovmskps %ymm0, %ecx
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: cmpl $255, %ecx
-; AVX-NEXT: sete %al
-; AVX-NEXT: negl %eax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX1-LABEL: test_v8f32_sext:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: vtestps %ymm1, %ymm0
+; AVX1-NEXT: sbbl %eax, %eax
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_v8f32_sext:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: sbbl %eax, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_v8f32_sext:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vtestps %ymm1, %ymm0
+; AVX512-NEXT: sbbl %eax, %eax
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
%s = sext <8 x i1> %c to <8 x i32>
%1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-LABEL: test_v2i64_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovmskpd %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: cmpl $3, %ecx
-; AVX-NEXT: sete %al
-; AVX-NEXT: negq %rax
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vtestpd %xmm1, %xmm0
+; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: retq
%c = icmp sgt <2 x i64> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskpd %xmm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
-; AVX1-NEXT: cmpl $3, %ecx
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: negq %rax
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestpd %xmm1, %xmm0
+; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v4i64_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskpd %ymm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
-; AVX2-NEXT: cmpl $15, %ecx
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: negq %rax
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestpd %ymm1, %ymm0
+; AVX2-NEXT: sbbq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vmovmskpd %ymm0, %ecx
; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: cmpl $15, %ecx
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: negq %rax
+; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vtestpd %ymm1, %ymm0
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %ecx
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: xorl %eax, %eax
-; AVX1-NEXT: cmpl $15, %ecx
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: negq %rax
+; AVX1-NEXT: vtestps %xmm1, %xmm0
+; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovmskps %xmm0, %ecx
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: xorl %eax, %eax
-; AVX2-NEXT: cmpl $15, %ecx
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: negq %rax
+; AVX2-NEXT: vtestps %xmm1, %xmm0
+; AVX2-NEXT: sbbq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z}
; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: cmpl $15, %ecx
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: negq %rax
+; AVX512-NEXT: vtestps %xmm0, %xmm1
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
; AVX-LABEL: test_v4i32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %ecx
; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: cmpl $15, %ecx
-; AVX-NEXT: sete %al
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vtestps %xmm1, %xmm0
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
%c = icmp sgt <4 x i32> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
-; AVX1-NEXT: cmpl $15, %ecx
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestps %xmm1, %xmm0
+; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8i32_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %ecx
; AVX2-NEXT: xorl %eax, %eax
-; AVX2-NEXT: cmpl $255, %ecx
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vmovmskps %ymm0, %ecx
; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: cmpl $255, %ecx
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: negl %eax
+; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vtestps %ymm1, %ymm0
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
; AVX1OR2-LABEL: bool_reduction_v2f64:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $3, %al
-; AVX1OR2-NEXT: sete %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0
+; AVX1OR2-NEXT: setb %al
; AVX1OR2-NEXT: retq
;
; AVX512-LABEL: bool_reduction_v2f64:
; AVX1OR2-LABEL: bool_reduction_v4f32:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
-; AVX1OR2-NEXT: cmpb $15, %al
-; AVX1OR2-NEXT: sete %al
+; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT: vtestps %xmm1, %xmm0
+; AVX1OR2-NEXT: setb %al
; AVX1OR2-NEXT: retq
;
; AVX512-LABEL: bool_reduction_v4f32:
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: bool_reduction_v4f64:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
-; AVX1OR2-NEXT: vmovmskpd %ymm0, %eax
-; AVX1OR2-NEXT: cmpb $15, %al
-; AVX1OR2-NEXT: sete %al
-; AVX1OR2-NEXT: vzeroupper
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: bool_reduction_v4f64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vtestpd %ymm1, %ymm0
+; AVX1-NEXT: setb %al
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: bool_reduction_v4f64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestpd %ymm1, %ymm0
+; AVX2-NEXT: setb %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
;
; AVX512-LABEL: bool_reduction_v4f64:
; AVX512: # %bb.0:
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: bool_reduction_v8f32:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
-; AVX1OR2-NEXT: vmovmskps %ymm0, %eax
-; AVX1OR2-NEXT: cmpb $-1, %al
-; AVX1OR2-NEXT: sete %al
-; AVX1OR2-NEXT: vzeroupper
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: bool_reduction_v8f32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vtestps %ymm1, %ymm0
+; AVX1-NEXT: setb %al
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: bool_reduction_v8f32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: setb %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
;
; AVX512-LABEL: bool_reduction_v8f32:
; AVX512: # %bb.0:
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskpd %xmm0, %eax
-; AVX1-NEXT: cmpl $3, %eax
-; AVX1-NEXT: sete %al
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vtestpd %xmm1, %xmm0
+; AVX1-NEXT: setb %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: bool_reduction_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vmovmskpd %ymm0, %eax
-; AVX2-NEXT: cmpb $15, %al
-; AVX2-NEXT: sete %al
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vtestpd %ymm1, %ymm0
+; AVX2-NEXT: setb %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;