;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
+; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
-; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm4
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
+; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm4, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
+; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpmaxuw %ymm2, %ymm0, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpmaxuw %ymm3, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0
+; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2
; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm4
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpmaxuw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
+; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm4, %ymm1
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
-; NoVLX-NEXT: vpmaxuw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
+; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0