; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k2
-; NoVLX-NEXT: kshiftrw $15, %k2, %k2
-; NoVLX-NEXT: kshiftlw $15, %k0, %k3
-; NoVLX-NEXT: kshiftrw $15, %k3, %k3
-; NoVLX-NEXT: kshiftlw $14, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kmovw %k3, %ecx
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k0, %k3
+; NoVLX-NEXT: kmovw %k3, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $1, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $4, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $5, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $6, %k0, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0