; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k1, %ymm0
+; GENERIC-NEXT: vpmovm2d %k1, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm1
+; GENERIC-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k1, %ymm0
+; SKX-NEXT: vpmovm2d %k1, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm1
+; SKX-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <16 x double> %a, zeroinitializer
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <8 x float> %a, zeroinitializer
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <4 x float> %a, zeroinitializer
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <4 x double> %a, zeroinitializer
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <2 x float> %a, zeroinitializer
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2q %k0, %xmm0
+; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
; GENERIC-LABEL: zext_8x8mem_to_8x16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_8x8mem_to_8x16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_16x8mem_to_16x16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1
+; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_16x8mem_to_16x16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1
+; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_16x8_to_16x16_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1
+; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_16x8_to_16x16_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1
+; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_32x8mem_to_32x16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm0, %k1
+; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_32x8mem_to_32x16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm0, %k1
+; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_32x8_to_32x16_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm1, %k1
+; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_32x8_to_32x16_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm1, %k1
+; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x8mem_to_8x32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_8x8mem_to_8x32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_16x8mem_to_16x32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1
+; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_16x8mem_to_16x32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1
+; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_16x8_to_16x32_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1
+; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_16x8_to_16x32_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1
+; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x8mem_to_8x64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_8x8mem_to_8x64mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x16mem_to_8x32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_8x16mem_to_8x32mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x16_to_8x32mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm1, %k1
+; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_16x16mem_to_16x32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1
+; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_16x16mem_to_16x32mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1
+; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_16x16_to_16x32mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1
+; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x16mem_to_8x64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_8x16mem_to_8x64mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x16_to_8x64mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm1, %k1
+; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x32mem_to_8x64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: sext_8x32mem_to_8x64mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1
+; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: zext_8x32_to_8x64mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm1, %k1
+; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: trunc_16i8_to_16i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0
+; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %k0, %eax
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: trunc_4i32_to_4i1:
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%mask_a = trunc <4 x i32>%a to <4 x i1>
%mask_b = trunc <4 x i32>%b to <4 x i1>
; GENERIC-LABEL: trunc_8i16_to_8i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0
+; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %k0, %eax
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: sext_8i1_8i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8i1_8i32:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%x = icmp slt <8 x i32> %a1, %a2
%x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; GENERIC-LABEL: sext_8i1_8i16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2w %k0, %xmm0
+; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8i1_8i16:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%x = icmp slt <8 x i32> %a1, %a2
; GENERIC-LABEL: sext_16i1_16i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16i1_16i32:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%x = icmp slt <16 x i32> %a1, %a2
%y = sext <16 x i1> %x to <16 x i32>
; GENERIC-LABEL: sext_8i1_8i64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2q %k0, %zmm0
+; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8i1_8i64:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %zmm0
+; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%x = icmp slt <8 x i32> %a1, %a2
%y = sext <8 x i1> %x to <8 x i64>
; GENERIC-LABEL: test21:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm2, %k1
+; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; GENERIC-NEXT: kshiftrq $32, %k1, %k1
; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z}
; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1
+; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: test_x86_fmadd132_ps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm2, %k1
+; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [9:1.00]
; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: test_x86_fmadd231_ps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm2, %k1
+; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [9:1.00]
; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-LABEL: test_x86_fmadd213_ps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm2, %k1
+; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [7:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: kandnw %k0, %k1, %k0
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
; SKX-NEXT: kandnw %k0, %k1, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%x_gt_y = icmp sgt <4 x i64> %x, %y
; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: kandnw %k1, %k0, %k0
-; GENERIC-NEXT: vpmovm2q %k0, %xmm0
+; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test5:
; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
; SKX-NEXT: kandnw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%x_gt_y = icmp slt <2 x i64> %x, %y
%x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
; GENERIC-LABEL: vcmp_test7:
; GENERIC: # %bb.0: # %allocas
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0
+; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $85, %al # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1
; GENERIC-NEXT: korb %k1, %k0, %k0
; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.2:
; GENERIC-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0
+; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-NEXT: .LBB386_1:
; GENERIC-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0
+; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50]
; SKX-NEXT: # %bb.2:
; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
; SKX-NEXT: .LBB386_1:
; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%cond = icmp sgt i32 %a1, %b1
; GENERIC-NEXT: .LBB387_1:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: .LBB387_3:
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0
+; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpmov_test9:
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: .LBB387_3:
; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp sgt i32 %a1, %b1
%c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: .LBB389_3:
; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vmov_test11:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: .LBB389_3:
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp sgt i32 %a1, %b1
%c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
; GENERIC-NEXT: movw $1, %cx # sched: [1:0.33]
; GENERIC-NEXT: cmovgw %ax, %cx # sched: [2:0.67]
; GENERIC-NEXT: kmovd %ecx, %k0
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0
+; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vmov_test15:
; SKX-NEXT: movw $1, %cx # sched: [1:0.25]
; SKX-NEXT: cmovgw %ax, %cx # sched: [1:0.50]
; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i16 21845 to <16 x i1>
%b = bitcast i16 1 to <16 x i1>
; GENERIC-NEXT: kmovq %rdi, %k0
; GENERIC-NEXT: movb $1, %al # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1
-; GENERIC-NEXT: vpmovm2b %k1, %zmm0
+; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2b %k0, %zmm1
+; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm0, %k0
-; GENERIC-NEXT: vpmovm2b %k0, %zmm0
+; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vmov_test16:
; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00]
; SKX-NEXT: movb $1, %al # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k1, %zmm0
+; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25]
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovm2b %k0, %zmm1
+; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25]
; SKX-NEXT: movl $32, %eax # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
%b = insertelement <64 x i1>%a, i1 true, i32 5
; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33]
; GENERIC-NEXT: setg %al # sched: [1:0.50]
; GENERIC-NEXT: kmovd %eax, %k1
-; GENERIC-NEXT: vpmovm2b %k1, %zmm0
+; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2b %k0, %zmm1
+; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm0, %k0
-; GENERIC-NEXT: vpmovm2b %k0, %zmm0
+; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vmov_test17:
; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25]
; SKX-NEXT: setg %al # sched: [1:0.50]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k1, %zmm0
+; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25]
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovm2b %k0, %zmm1
+; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25]
; SKX-NEXT: movl $32, %eax # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
%b = icmp sgt i32 %y, %z
; GENERIC-NEXT: kshiftrw $15, %k0, %k0
; GENERIC-NEXT: kshiftlw $6, %k2, %k2
; GENERIC-NEXT: kshiftrw $15, %k2, %k2
-; GENERIC-NEXT: vpmovm2q %k1, %zmm0
-; GENERIC-NEXT: vpmovm2q %k2, %zmm1
+; GENERIC-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50]
; GENERIC-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %zmm2, %k1
+; GENERIC-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: kshiftlb $1, %k1, %k1
; GENERIC-NEXT: kshiftrb $1, %k1, %k1
; GENERIC-NEXT: kshiftlb $7, %k0, %k0
; GENERIC-NEXT: korb %k0, %k1, %k0
-; GENERIC-NEXT: vpmovm2w %k0, %xmm0
+; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: kshiftlw $6, %k2, %k2 # sched: [3:1.00]
; SKX-NEXT: kshiftrw $15, %k2, %k2 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k1, %zmm0
-; SKX-NEXT: vpmovm2q %k2, %zmm1
+; SKX-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.25]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [8:0.50]
; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [3:1.00]
; SKX-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:1.00]
; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00]
; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%b = bitcast i8 %a to <8 x i1>
; GENERIC-LABEL: vmov_test21:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm1, %k1
+; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: store_v8i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0
+; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: knotb %k0, %k0
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: store_v16i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0
+; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: knotw %k0, %k0
; GENERIC-NEXT: kmovw %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: load_8i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovb (%rdi), %k0
-; GENERIC-NEXT: vpmovm2q %k0, %zmm0
+; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: load_8i1:
; SKX: # %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2q %k0, %zmm0
+; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%b = load <8 x i1>, <8 x i1>* %a
%c = sext <8 x i1> %b to <8 x i64>
; GENERIC-LABEL: load_16i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw (%rdi), %k0
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: load_16i1:
; SKX: # %bb.0:
; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%b = load <16 x i1>, <16 x i1>* %a
%c = sext <16 x i1> %b to <16 x i32>
; GENERIC-LABEL: load_2i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovb (%rdi), %k0
-; GENERIC-NEXT: vpmovm2q %k0, %xmm0
+; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: load_2i1:
; SKX: # %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%b = load <2 x i1>, <2 x i1>* %a
%c = sext <2 x i1> %b to <2 x i16>
; GENERIC-LABEL: load_4i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovb (%rdi), %k0
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: load_4i1:
; SKX: # %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%b = load <4 x i1>, <4 x i1>* %a
%c = sext <4 x i1> %b to <4 x i16>
; GENERIC-LABEL: load_32i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovd (%rdi), %k0
-; GENERIC-NEXT: vpmovm2w %k0, %zmm0
+; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: load_32i1:
; SKX: # %bb.0:
; SKX-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2w %k0, %zmm0
+; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%b = load <32 x i1>, <32 x i1>* %a
%c = sext <32 x i1> %b to <32 x i16>
; GENERIC-LABEL: load_64i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovq (%rdi), %k0
-; GENERIC-NEXT: vpmovm2b %k0, %zmm0
+; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: load_64i1:
; SKX: # %bb.0:
; SKX-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%b = load <64 x i1>, <64 x i1>* %a
%c = sext <64 x i1> %b to <64 x i8>
; GENERIC-LABEL: store_8i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0
+; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: store_8i1_1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0
+; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: store_16i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0
+; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovw %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; GENERIC-LABEL: store_32i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm0, %k0
+; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %k0, (%rdi)
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: store_32i1_1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovw2m %zmm0, %k0
+; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovd %k0, (%rdi)
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-LABEL: store_64i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm0, %k0
+; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: kmovq %k0, (%rdi)
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: knotw %k0, %k1
; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
; SKX: # %bb.0: # %entry
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: knotw %k0, %k1 # sched: [1:1.00]
; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT: retq # sched: [7:1.00]