;
; AVX512F-32-LABEL: test_mask_cmp_b_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: pushl %ebx
+; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .Lcfi5:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: pushl %ebx
; AVX512F-32-NEXT: .Lcfi6:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
-; AVX512F-32-NEXT: subl $60, %esp
+; AVX512F-32-NEXT: pushl %edi
; AVX512F-32-NEXT: .Lcfi7:
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .Lcfi8:
-; AVX512F-32-NEXT: .cfi_offset %esi, -12
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 20
+; AVX512F-32-NEXT: subl $60, %esp
; AVX512F-32-NEXT: .Lcfi9:
-; AVX512F-32-NEXT: .cfi_offset %ebx, -8
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT: .Lcfi10:
+; AVX512F-32-NEXT: .cfi_offset %esi, -20
+; AVX512F-32-NEXT: .Lcfi11:
+; AVX512F-32-NEXT: .cfi_offset %edi, -16
+; AVX512F-32-NEXT: .Lcfi12:
+; AVX512F-32-NEXT: .cfi_offset %ebx, -12
+; AVX512F-32-NEXT: .Lcfi13:
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrb $5, %al
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: movl $1036, %edi # imm = 0x40C
+; AVX512F-32-NEXT: bextrl %edi, %ecx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ecx, %ebp
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: movl $526, %edx # imm = 0x20E
+; AVX512F-32-NEXT: bextrl %edx, %ebp, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-32-NEXT: movl $271, %esi # imm = 0x10F
+; AVX512F-32-NEXT: bextrl %esi, %ebp, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $16, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: shrl $16, %ecx
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %ebx
+; AVX512F-32-NEXT: movl %ecx, %ebx
; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $2, %bl
; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: shrb $3, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $4, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $4, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $5, %dl
-; AVX512F-32-NEXT: andb $1, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $5, %al
+; AVX512F-32-NEXT: andb $1, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $6, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $6, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> %ECX<def>
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %ebp, %ebx
+; AVX512F-32-NEXT: shrl $24, %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
+; AVX512F-32-NEXT: andb $15, %bl
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrb $2, %bl
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm7, %ymm5, %ymm5
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
+; AVX512F-32-NEXT: movl %ebp, %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: movl %ecx, %esi
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm2, %ymm7, %ymm7
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: andb $2, %al
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $4, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $5, %al
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $6, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %al
+; AVX512F-32-NEXT: movb %bh, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: bextrl %edi, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: bextrl %edx, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; AVX512F-32-NEXT: bextrl %esi, %ebx, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrl $16, %ebx
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrl $16, %eax
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: andb $15, %al
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $2, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
-; AVX512F-32-NEXT: shrb $7, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
+; AVX512F-32-NEXT: shrb $7, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $24, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl %esi, %eax
-; AVX512F-32-NEXT: adcxl %ecx, %edx
+; AVX512F-32-NEXT: addl %ebp, %eax
+; AVX512F-32-NEXT: adcxl %ebx, %edx
; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: popl %esi
+; AVX512F-32-NEXT: popl %edi
; AVX512F-32-NEXT: popl %ebx
+; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
; AVX512F-32-LABEL: test_ucmp_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $60, %esp
-; AVX512F-32-NEXT: .Lcfi10:
+; AVX512F-32-NEXT: .Lcfi14:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 64
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
;
; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: pushl %ebx
-; AVX512F-32-NEXT: .Lcfi11:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .Lcfi15:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT: pushl %esi
-; AVX512F-32-NEXT: .Lcfi12:
+; AVX512F-32-NEXT: pushl %ebx
+; AVX512F-32-NEXT: .Lcfi16:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
+; AVX512F-32-NEXT: pushl %edi
+; AVX512F-32-NEXT: .Lcfi17:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: .Lcfi18:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 20
; AVX512F-32-NEXT: subl $60, %esp
-; AVX512F-32-NEXT: .Lcfi13:
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
-; AVX512F-32-NEXT: .Lcfi14:
-; AVX512F-32-NEXT: .cfi_offset %esi, -12
-; AVX512F-32-NEXT: .Lcfi15:
-; AVX512F-32-NEXT: .cfi_offset %ebx, -8
+; AVX512F-32-NEXT: .Lcfi19:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT: .Lcfi20:
+; AVX512F-32-NEXT: .cfi_offset %esi, -20
+; AVX512F-32-NEXT: .Lcfi21:
+; AVX512F-32-NEXT: .cfi_offset %edi, -16
+; AVX512F-32-NEXT: .Lcfi22:
+; AVX512F-32-NEXT: .cfi_offset %ebx, -12
+; AVX512F-32-NEXT: .Lcfi23:
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrb $5, %al
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: movl $1036, %edi # imm = 0x40C
+; AVX512F-32-NEXT: bextrl %edi, %ecx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ecx, %ebp
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: movl $526, %edx # imm = 0x20E
+; AVX512F-32-NEXT: bextrl %edx, %ebp, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-32-NEXT: movl $271, %esi # imm = 0x10F
+; AVX512F-32-NEXT: bextrl %esi, %ebp, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $16, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: shrl $16, %ecx
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %ebx
+; AVX512F-32-NEXT: movl %ecx, %ebx
; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $2, %bl
; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: shrb $3, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $4, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $4, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $5, %dl
-; AVX512F-32-NEXT: andb $1, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $5, %al
+; AVX512F-32-NEXT: andb $1, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $6, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $6, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> %ECX<def>
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %ebp, %ebx
+; AVX512F-32-NEXT: shrl $24, %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
+; AVX512F-32-NEXT: andb $15, %bl
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrb $2, %bl
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm7, %ymm5, %ymm5
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
+; AVX512F-32-NEXT: movl %ebp, %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: movl %ecx, %esi
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm2, %ymm7, %ymm7
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: andb $2, %al
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $4, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $5, %al
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $6, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %al
+; AVX512F-32-NEXT: movb %bh, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: bextrl %edi, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: bextrl %edx, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; AVX512F-32-NEXT: bextrl %esi, %ebx, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrl $16, %ebx
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrl $16, %eax
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: andb $15, %al
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $2, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
-; AVX512F-32-NEXT: shrb $7, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
+; AVX512F-32-NEXT: shrb $7, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $24, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl %esi, %eax
-; AVX512F-32-NEXT: adcxl %ecx, %edx
+; AVX512F-32-NEXT: addl %ebp, %eax
+; AVX512F-32-NEXT: adcxl %ebx, %edx
; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: popl %esi
+; AVX512F-32-NEXT: popl %edi
; AVX512F-32-NEXT: popl %ebx
+; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)