; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $3, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $255, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $3, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: retq
%c = icmp sgt <2 x i64> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negq %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $15, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $255, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%c = icmp sgt <8 x i16> %a0, %a1
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpl $-1, %ecx
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negl %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negb %al
; AVX512-NEXT: movsbl %al, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negb %al
; AVX512-NEXT: retq
%c = icmp sgt <16 x i8> %a0, %a1
%s = sext <16 x i1> %c to <16 x i8>
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-NEXT: cmpl $-1, %eax
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: negb %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <32 x i8> %a0, %a1
; AVX512-LABEL: test_v2f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: retq
%c = fcmp ogt <2 x double> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
; AVX512-LABEL: test_v4f64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <4 x double> %a0, %a1
; AVX512-LABEL: test_v4f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: retq
%c = fcmp ogt <4 x float> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
; AVX512-LABEL: test_v8f32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = fcmp ogt <8 x float> %a0, %a1
; AVX512-LABEL: test_v2i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: retq
%c = icmp sgt <2 x i64> %a0, %a1
%s = sext <2 x i1> %c to <2 x i64>
; AVX512-LABEL: test_v4i64_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vmovmskpd %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cltq
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbq %rax, %rax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <4 x i64> %a0, %a1
; AVX512-LABEL: test_v4i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a0, %a1
%s = sext <4 x i1> %c to <4 x i32>
; AVX512-LABEL: test_v8i32_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vmovmskps %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2w %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
-; AVX512-NEXT: cwtl
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%c = icmp sgt <8 x i32> %a0, %a1
; AVX512-LABEL: test_v8i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: retq
%c = icmp sgt <8 x i16> %a0, %a1
; AVX512-LABEL: test_v16i16_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vmovd %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; AVX512-NEXT: vpmovm2b %k0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
-; AVX512-NEXT: movsbl %al, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX512-LABEL: test_v16i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %xmm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
%c = icmp sgt <16 x i8> %a0, %a1
; AVX512-LABEL: test_v32i8_sext:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpmovmskb %ymm0, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: sbbl %eax, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq