; SSE42-NEXT: popq %rbx
; SSE42-NEXT: retq
;
-; AVX1-LABEL: _clearupper32xi8b:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: movq %rax, %rdx
-; AVX1-NEXT: movq %rax, %rsi
-; AVX1-NEXT: movq %rax, %rdi
-; AVX1-NEXT: shrq $32, %rdi
-; AVX1-NEXT: andl $15, %edi
-; AVX1-NEXT: shlq $32, %rdi
-; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX1-NEXT: orq %rdi, %rax
-; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
-; AVX1-NEXT: shrq $40, %rsi
-; AVX1-NEXT: andl $15, %esi
-; AVX1-NEXT: shlq $40, %rsi
-; AVX1-NEXT: orq %rax, %rsi
-; AVX1-NEXT: movq %rdi, %rax
-; AVX1-NEXT: shrq $48, %rdx
-; AVX1-NEXT: andl $15, %edx
-; AVX1-NEXT: shlq $48, %rdx
-; AVX1-NEXT: orq %rsi, %rdx
-; AVX1-NEXT: movq %rdi, %rsi
-; AVX1-NEXT: shrq $56, %rcx
-; AVX1-NEXT: andl $15, %ecx
-; AVX1-NEXT: shlq $56, %rcx
-; AVX1-NEXT: orq %rdx, %rcx
-; AVX1-NEXT: movq %rdi, %rdx
-; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movq %rdi, %rcx
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: andl $15, %ecx
-; AVX1-NEXT: shlq $32, %rcx
-; AVX1-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; AVX1-NEXT: orq %rcx, %rdi
-; AVX1-NEXT: shrq $40, %rdx
-; AVX1-NEXT: andl $15, %edx
-; AVX1-NEXT: shlq $40, %rdx
-; AVX1-NEXT: orq %rdi, %rdx
-; AVX1-NEXT: shrq $48, %rsi
-; AVX1-NEXT: andl $15, %esi
-; AVX1-NEXT: shlq $48, %rsi
-; AVX1-NEXT: orq %rdx, %rsi
-; AVX1-NEXT: shrq $56, %rax
-; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: shlq $56, %rax
-; AVX1-NEXT: orq %rsi, %rax
-; AVX1-NEXT: vmovq %xmm0, %rcx
-; AVX1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: movl %ecx, %eax
-; AVX1-NEXT: shrl $8, %eax
-; AVX1-NEXT: vmovd %ecx, %xmm1
-; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX1-NEXT: movl %ecx, %eax
-; AVX1-NEXT: shrl $16, %eax
-; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX1-NEXT: movl %ecx, %eax
-; AVX1-NEXT: shrl $24, %eax
-; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX1-NEXT: movq %rcx, %rax
-; AVX1-NEXT: shrq $32, %rax
-; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX1-NEXT: movq %rcx, %rax
-; AVX1-NEXT: shrq $40, %rax
-; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX1-NEXT: movq %rcx, %rax
-; AVX1-NEXT: shrq $48, %rax
-; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: shrq $56, %rcx
-; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $16, %ecx
-; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $24, %ecx
-; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq $32, %rcx
-; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq $40, %rcx
-; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq $48, %rcx
-; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX1-NEXT: shrq $56, %rax
-; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper32xi8b:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: movq %rax, %rdx
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: movq %rax, %rdi
-; AVX2-NEXT: shrq $32, %rdi
-; AVX2-NEXT: andl $15, %edi
-; AVX2-NEXT: shlq $32, %rdi
-; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; AVX2-NEXT: orq %rdi, %rax
-; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
-; AVX2-NEXT: shrq $40, %rsi
-; AVX2-NEXT: andl $15, %esi
-; AVX2-NEXT: shlq $40, %rsi
-; AVX2-NEXT: orq %rax, %rsi
-; AVX2-NEXT: movq %rdi, %rax
-; AVX2-NEXT: shrq $48, %rdx
-; AVX2-NEXT: andl $15, %edx
-; AVX2-NEXT: shlq $48, %rdx
-; AVX2-NEXT: orq %rsi, %rdx
-; AVX2-NEXT: movq %rdi, %rsi
-; AVX2-NEXT: shrq $56, %rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: shlq $56, %rcx
-; AVX2-NEXT: orq %rdx, %rcx
-; AVX2-NEXT: movq %rdi, %rdx
-; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: movq %rdi, %rcx
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: shlq $32, %rcx
-; AVX2-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; AVX2-NEXT: orq %rcx, %rdi
-; AVX2-NEXT: shrq $40, %rdx
-; AVX2-NEXT: andl $15, %edx
-; AVX2-NEXT: shlq $40, %rdx
-; AVX2-NEXT: orq %rdi, %rdx
-; AVX2-NEXT: shrq $48, %rsi
-; AVX2-NEXT: andl $15, %esi
-; AVX2-NEXT: shlq $48, %rsi
-; AVX2-NEXT: orq %rdx, %rsi
-; AVX2-NEXT: shrq $56, %rax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: shlq $56, %rax
-; AVX2-NEXT: orq %rsi, %rax
-; AVX2-NEXT: vmovq %xmm0, %rcx
-; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: vmovd %ecx, %xmm1
-; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $16, %eax
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $24, %eax
-; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX2-NEXT: movq %rcx, %rax
-; AVX2-NEXT: shrq $32, %rax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX2-NEXT: movq %rcx, %rax
-; AVX2-NEXT: shrq $40, %rax
-; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX2-NEXT: movq %rcx, %rax
-; AVX2-NEXT: shrq $48, %rax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: shrq $56, %rcx
-; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $8, %ecx
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $16, %ecx
-; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $24, %ecx
-; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: shrq $32, %rcx
-; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: shrq $40, %rcx
-; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rcx
-; AVX2-NEXT: shrq $48, %rcx
-; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX2-NEXT: shrq $56, %rax
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper32xi8b:
+; AVX: # %bb.0:
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
+; AVX-NEXT: movq %rdx, %rax
+; AVX-NEXT: shrq $56, %rax
+; AVX-NEXT: andl $15, %eax
+; AVX-NEXT: movq %rdx, %rcx
+; AVX-NEXT: shrq $48, %rcx
+; AVX-NEXT: andl $15, %ecx
+; AVX-NEXT: movq %rdx, %rsi
+; AVX-NEXT: shrq $40, %rsi
+; AVX-NEXT: andl $15, %esi
+; AVX-NEXT: movq %rdx, %r8
+; AVX-NEXT: shrq $32, %r8
+; AVX-NEXT: andl $15, %r8d
+; AVX-NEXT: movq %rdi, %r9
+; AVX-NEXT: shrq $56, %r9
+; AVX-NEXT: andl $15, %r9d
+; AVX-NEXT: movq %rdi, %r10
+; AVX-NEXT: shrq $48, %r10
+; AVX-NEXT: andl $15, %r10d
+; AVX-NEXT: movq %rdi, %r11
+; AVX-NEXT: shrq $40, %r11
+; AVX-NEXT: andl $15, %r11d
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: shrq $32, %rbx
+; AVX-NEXT: andl $15, %ebx
+; AVX-NEXT: shlq $32, %rbx
+; AVX-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
+; AVX-NEXT: orq %rbx, %rdi
+; AVX-NEXT: shlq $40, %r11
+; AVX-NEXT: orq %rdi, %r11
+; AVX-NEXT: shlq $48, %r10
+; AVX-NEXT: orq %r11, %r10
+; AVX-NEXT: shlq $56, %r9
+; AVX-NEXT: orq %r10, %r9
+; AVX-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: shlq $32, %r8
+; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; AVX-NEXT: orq %r8, %rdx
+; AVX-NEXT: shlq $40, %rsi
+; AVX-NEXT: orq %rdx, %rsi
+; AVX-NEXT: shlq $48, %rcx
+; AVX-NEXT: orq %rsi, %rcx
+; AVX-NEXT: shlq $56, %rax
+; AVX-NEXT: orq %rcx, %rax
+; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: vinsertf128 $0, -{{[0-9]+}}(%rsp), %ymm0, %ymm0
+; AVX-NEXT: popq %rbx
+; AVX-NEXT: retq
%x4 = bitcast <32 x i8> %0 to <64 x i4>
%r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1
%r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: movq %rax, %rdi
-; AVX-NEXT: movq %rax, %r8
-; AVX-NEXT: movq %rax, %r9
-; AVX-NEXT: movl %eax, %r10d
-; AVX-NEXT: movl %eax, %r11d
-; AVX-NEXT: vmovd %eax, %xmm1
-; AVX-NEXT: shrl $8, %eax
-; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX-NEXT: shrl $16, %r11d
-; AVX-NEXT: vpinsrb $2, %r11d, %xmm1, %xmm1
-; AVX-NEXT: shrl $24, %r10d
-; AVX-NEXT: vpinsrb $3, %r10d, %xmm1, %xmm1
-; AVX-NEXT: shrq $32, %r9
-; AVX-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1
-; AVX-NEXT: shrq $40, %r8
-; AVX-NEXT: vpinsrb $5, %r8d, %xmm1, %xmm1
-; AVX-NEXT: shrq $48, %rdi
-; AVX-NEXT: vpinsrb $6, %edi, %xmm1, %xmm1
-; AVX-NEXT: vpextrq $1, %xmm0, %rax
-; AVX-NEXT: shrq $56, %rsi
-; AVX-NEXT: vpinsrb $7, %esi, %xmm1, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $8, %esi
-; AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $16, %esi
-; AVX-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $24, %esi
-; AVX-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $32, %rsi
-; AVX-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $40, %rsi
-; AVX-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $48, %rsi
-; AVX-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX-NEXT: shrq $56, %rax
-; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX-NEXT: vmovaps 32(%rdx), %ymm1
; AVX-NEXT: vpaddb (%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovaps 16(%rdx), %xmm2
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: movq %rax, %rdi
-; AVX2-NEXT: movq %rax, %r8
-; AVX2-NEXT: movq %rax, %r9
-; AVX2-NEXT: movl %eax, %r10d
-; AVX2-NEXT: movl %eax, %r11d
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX2-NEXT: shrl $16, %r11d
-; AVX2-NEXT: vpinsrb $2, %r11d, %xmm1, %xmm1
-; AVX2-NEXT: shrl $24, %r10d
-; AVX2-NEXT: vpinsrb $3, %r10d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $32, %r9
-; AVX2-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $40, %r8
-; AVX2-NEXT: vpinsrb $5, %r8d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $48, %rdi
-; AVX2-NEXT: vpinsrb $6, %edi, %xmm1, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: shrq $56, %rsi
-; AVX2-NEXT: vpinsrb $7, %esi, %xmm1, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $8, %esi
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $16, %esi
-; AVX2-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $24, %esi
-; AVX2-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $32, %rsi
-; AVX2-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $40, %rsi
-; AVX2-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $48, %rsi
-; AVX2-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX2-NEXT: shrq $56, %rax
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX2-NEXT: vmovaps 32(%rdx), %ymm1
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX2-NEXT: vmovaps %ymm1, 32(%rcx)
;
; AVX512F-LABEL: vec384_v3i128_to_v1i384_factor3:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512F-NEXT: vmovq %xmm0, %rax
-; AVX512F-NEXT: movq %rax, %rsi
-; AVX512F-NEXT: movq %rax, %rdi
-; AVX512F-NEXT: movq %rax, %r8
-; AVX512F-NEXT: movq %rax, %r9
-; AVX512F-NEXT: movl %eax, %r10d
-; AVX512F-NEXT: movl %eax, %r11d
-; AVX512F-NEXT: vmovd %eax, %xmm1
-; AVX512F-NEXT: shrl $8, %eax
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: shrl $16, %r11d
-; AVX512F-NEXT: vpinsrb $2, %r11d, %xmm1, %xmm1
-; AVX512F-NEXT: shrl $24, %r10d
-; AVX512F-NEXT: vpinsrb $3, %r10d, %xmm1, %xmm1
-; AVX512F-NEXT: shrq $32, %r9
-; AVX512F-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1
-; AVX512F-NEXT: shrq $40, %r8
-; AVX512F-NEXT: vpinsrb $5, %r8d, %xmm1, %xmm1
-; AVX512F-NEXT: shrq $48, %rdi
-; AVX512F-NEXT: vpinsrb $6, %edi, %xmm1, %xmm1
-; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512F-NEXT: shrq $56, %rsi
-; AVX512F-NEXT: vpinsrb $7, %esi, %xmm1, %xmm0
-; AVX512F-NEXT: movl %eax, %esi
-; AVX512F-NEXT: shrl $8, %esi
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX512F-NEXT: movl %eax, %esi
-; AVX512F-NEXT: shrl $16, %esi
-; AVX512F-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX512F-NEXT: movl %eax, %esi
-; AVX512F-NEXT: shrl $24, %esi
-; AVX512F-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX512F-NEXT: movq %rax, %rsi
-; AVX512F-NEXT: shrq $32, %rsi
-; AVX512F-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX512F-NEXT: movq %rax, %rsi
-; AVX512F-NEXT: shrq $40, %rsi
-; AVX512F-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX512F-NEXT: movq %rax, %rsi
-; AVX512F-NEXT: shrq $48, %rsi
-; AVX512F-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX512F-NEXT: shrq $56, %rax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovaps 32(%rdx), %ymm1
; AVX512F-NEXT: vmovaps %ymm1, 32(%rcx)
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BW-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX512BW-NEXT: vmovq %xmm0, %rax
-; AVX512BW-NEXT: movq %rax, %rsi
-; AVX512BW-NEXT: movq %rax, %rdi
-; AVX512BW-NEXT: movq %rax, %r8
-; AVX512BW-NEXT: movq %rax, %r9
-; AVX512BW-NEXT: movl %eax, %r10d
-; AVX512BW-NEXT: movl %eax, %r11d
-; AVX512BW-NEXT: vmovd %eax, %xmm1
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512BW-NEXT: shrl $16, %r11d
-; AVX512BW-NEXT: vpinsrb $2, %r11d, %xmm1, %xmm1
-; AVX512BW-NEXT: shrl $24, %r10d
-; AVX512BW-NEXT: vpinsrb $3, %r10d, %xmm1, %xmm1
-; AVX512BW-NEXT: shrq $32, %r9
-; AVX512BW-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1
-; AVX512BW-NEXT: shrq $40, %r8
-; AVX512BW-NEXT: vpinsrb $5, %r8d, %xmm1, %xmm1
-; AVX512BW-NEXT: shrq $48, %rdi
-; AVX512BW-NEXT: vpinsrb $6, %edi, %xmm1, %xmm1
-; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512BW-NEXT: shrq $56, %rsi
-; AVX512BW-NEXT: vpinsrb $7, %esi, %xmm1, %xmm0
-; AVX512BW-NEXT: movl %eax, %esi
-; AVX512BW-NEXT: shrl $8, %esi
-; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512BW-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX512BW-NEXT: movl %eax, %esi
-; AVX512BW-NEXT: shrl $16, %esi
-; AVX512BW-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX512BW-NEXT: movl %eax, %esi
-; AVX512BW-NEXT: shrl $24, %esi
-; AVX512BW-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX512BW-NEXT: movq %rax, %rsi
-; AVX512BW-NEXT: shrq $32, %rsi
-; AVX512BW-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX512BW-NEXT: movq %rax, %rsi
-; AVX512BW-NEXT: shrq $40, %rsi
-; AVX512BW-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX512BW-NEXT: movq %rax, %rsi
-; AVX512BW-NEXT: shrq $48, %rsi
-; AVX512BW-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX512BW-NEXT: shrq $56, %rax
-; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
; AVX512BW-NEXT: vzeroupper
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vmovdqa 16(%rdi), %xmm1
-; AVX-NEXT: vpaddb 16(%rsi), %xmm1, %xmm1
-; AVX-NEXT: vmovq %xmm1, %rax
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: movq %rax, %rdi
-; AVX-NEXT: movq %rax, %r8
-; AVX-NEXT: movq %rax, %r9
-; AVX-NEXT: movl %eax, %r10d
-; AVX-NEXT: movl %eax, %r11d
-; AVX-NEXT: vmovd %eax, %xmm2
-; AVX-NEXT: shrl $8, %eax
-; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX-NEXT: shrl $16, %r11d
-; AVX-NEXT: vpinsrb $2, %r11d, %xmm2, %xmm2
-; AVX-NEXT: shrl $24, %r10d
-; AVX-NEXT: vpinsrb $3, %r10d, %xmm2, %xmm2
-; AVX-NEXT: shrq $32, %r9
-; AVX-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm2
-; AVX-NEXT: shrq $40, %r8
-; AVX-NEXT: vpinsrb $5, %r8d, %xmm2, %xmm2
-; AVX-NEXT: shrq $48, %rdi
-; AVX-NEXT: vpinsrb $6, %edi, %xmm2, %xmm2
-; AVX-NEXT: vpextrq $1, %xmm1, %rax
-; AVX-NEXT: shrq $56, %rsi
-; AVX-NEXT: vpinsrb $7, %esi, %xmm2, %xmm1
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $8, %esi
-; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $16, %esi
-; AVX-NEXT: vpinsrb $10, %esi, %xmm1, %xmm1
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $24, %esi
-; AVX-NEXT: vpinsrb $11, %esi, %xmm1, %xmm1
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $32, %rsi
-; AVX-NEXT: vpinsrb $12, %esi, %xmm1, %xmm1
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $40, %rsi
-; AVX-NEXT: vpinsrb $13, %esi, %xmm1, %xmm1
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $48, %rsi
-; AVX-NEXT: vpinsrb $14, %esi, %xmm1, %xmm1
-; AVX-NEXT: vmovq %xmm0, %rsi
-; AVX-NEXT: shrq $56, %rax
-; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: shrl $8, %eax
-; AVX-NEXT: vmovd %esi, %xmm2
-; AVX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: shrl $16, %eax
-; AVX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: shrl $24, %eax
-; AVX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX-NEXT: movq %rsi, %rax
-; AVX-NEXT: shrq $32, %rax
-; AVX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX-NEXT: movq %rsi, %rax
-; AVX-NEXT: shrq $40, %rax
-; AVX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX-NEXT: movq %rsi, %rax
-; AVX-NEXT: shrq $48, %rax
-; AVX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX-NEXT: vpextrq $1, %xmm0, %rax
-; AVX-NEXT: shrq $56, %rsi
-; AVX-NEXT: vpinsrb $7, %esi, %xmm2, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $8, %esi
-; AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $16, %esi
-; AVX-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $24, %esi
-; AVX-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $32, %rsi
-; AVX-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $40, %rsi
-; AVX-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $48, %rsi
-; AVX-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX-NEXT: shrq $56, %rax
-; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX-NEXT: vpaddb 16(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rdx), %xmm1, %xmm1
; AVX-NEXT: vpaddb (%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovaps 16(%rdx), %xmm2
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: movq %rax, %rdi
-; AVX2-NEXT: movq %rax, %r8
-; AVX2-NEXT: movq %rax, %r9
-; AVX2-NEXT: movl %eax, %r10d
-; AVX2-NEXT: movl %eax, %r11d
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX2-NEXT: shrl $16, %r11d
-; AVX2-NEXT: vpinsrb $2, %r11d, %xmm1, %xmm1
-; AVX2-NEXT: shrl $24, %r10d
-; AVX2-NEXT: vpinsrb $3, %r10d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $32, %r9
-; AVX2-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $40, %r8
-; AVX2-NEXT: vpinsrb $5, %r8d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $48, %rdi
-; AVX2-NEXT: vpinsrb $6, %edi, %xmm1, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: shrq $56, %rsi
-; AVX2-NEXT: vpinsrb $7, %esi, %xmm1, %xmm1
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $8, %esi
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX2-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $16, %esi
-; AVX2-NEXT: vpinsrb $10, %esi, %xmm1, %xmm1
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $24, %esi
-; AVX2-NEXT: vpinsrb $11, %esi, %xmm1, %xmm1
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $32, %rsi
-; AVX2-NEXT: vpinsrb $12, %esi, %xmm1, %xmm1
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $40, %rsi
-; AVX2-NEXT: vpinsrb $13, %esi, %xmm1, %xmm1
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $48, %rsi
-; AVX2-NEXT: vpinsrb $14, %esi, %xmm1, %xmm1
-; AVX2-NEXT: vmovq %xmm0, %rsi
-; AVX2-NEXT: shrq $56, %rax
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX2-NEXT: movl %esi, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: vmovd %esi, %xmm2
-; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %esi, %eax
-; AVX2-NEXT: shrl $16, %eax
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %esi, %eax
-; AVX2-NEXT: shrl $24, %eax
-; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movq %rsi, %rax
-; AVX2-NEXT: shrq $32, %rax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movq %rsi, %rax
-; AVX2-NEXT: shrq $40, %rax
-; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movq %rsi, %rax
-; AVX2-NEXT: shrq $48, %rax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: shrq $56, %rsi
-; AVX2-NEXT: vpinsrb $7, %esi, %xmm2, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $8, %esi
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $16, %esi
-; AVX2-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $24, %esi
-; AVX2-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $32, %rsi
-; AVX2-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $40, %rsi
-; AVX2-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $48, %rsi
-; AVX2-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX2-NEXT: shrq $56, %rax
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb (%rdx), %ymm1, %ymm1
-; AVX2-NEXT: vmovdqa %ymm1, (%rcx)
-; AVX2-NEXT: vmovdqa %ymm0, 32(%rcx)
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa %xmm0, %xmm0
+; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
+; AVX2-NEXT: vpaddb 32(%rdx), %ymm1, %ymm1
+; AVX2-NEXT: vmovdqa %ymm1, 32(%rcx)
+; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX: # %bb.0:
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: movq %rax, %rdi
-; AVX-NEXT: movq %rax, %r8
-; AVX-NEXT: movq %rax, %r9
-; AVX-NEXT: movl %eax, %r10d
-; AVX-NEXT: movl %eax, %r11d
-; AVX-NEXT: vmovd %eax, %xmm1
-; AVX-NEXT: shrl $8, %eax
-; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX-NEXT: shrl $16, %r11d
-; AVX-NEXT: vpinsrb $2, %r11d, %xmm1, %xmm1
-; AVX-NEXT: shrl $24, %r10d
-; AVX-NEXT: vpinsrb $3, %r10d, %xmm1, %xmm1
-; AVX-NEXT: shrq $32, %r9
-; AVX-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1
-; AVX-NEXT: shrq $40, %r8
-; AVX-NEXT: vpinsrb $5, %r8d, %xmm1, %xmm1
-; AVX-NEXT: shrq $48, %rdi
-; AVX-NEXT: vpinsrb $6, %edi, %xmm1, %xmm1
-; AVX-NEXT: vpextrq $1, %xmm0, %rax
-; AVX-NEXT: shrq $56, %rsi
-; AVX-NEXT: vpinsrb $7, %esi, %xmm1, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $8, %esi
-; AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $16, %esi
-; AVX-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX-NEXT: movl %eax, %esi
-; AVX-NEXT: shrl $24, %esi
-; AVX-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $32, %rsi
-; AVX-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $40, %rsi
-; AVX-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX-NEXT: movq %rax, %rsi
-; AVX-NEXT: shrq $48, %rsi
-; AVX-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX-NEXT: shrq $56, %rax
-; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX-NEXT: vmovaps 32(%rdx), %ymm1
; AVX-NEXT: vpaddb (%rdx), %xmm0, %xmm0
; AVX-NEXT: vmovaps 16(%rdx), %xmm2
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vpaddb (%rsi), %xmm0, %xmm0
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: movq %rax, %rdi
-; AVX2-NEXT: movq %rax, %r8
-; AVX2-NEXT: movq %rax, %r9
-; AVX2-NEXT: movl %eax, %r10d
-; AVX2-NEXT: movl %eax, %r11d
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX2-NEXT: shrl $16, %r11d
-; AVX2-NEXT: vpinsrb $2, %r11d, %xmm1, %xmm1
-; AVX2-NEXT: shrl $24, %r10d
-; AVX2-NEXT: vpinsrb $3, %r10d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $32, %r9
-; AVX2-NEXT: vpinsrb $4, %r9d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $40, %r8
-; AVX2-NEXT: vpinsrb $5, %r8d, %xmm1, %xmm1
-; AVX2-NEXT: shrq $48, %rdi
-; AVX2-NEXT: vpinsrb $6, %edi, %xmm1, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: shrq $56, %rsi
-; AVX2-NEXT: vpinsrb $7, %esi, %xmm1, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $8, %esi
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX2-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $16, %esi
-; AVX2-NEXT: vpinsrb $10, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movl %eax, %esi
-; AVX2-NEXT: shrl $24, %esi
-; AVX2-NEXT: vpinsrb $11, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $32, %rsi
-; AVX2-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $40, %rsi
-; AVX2-NEXT: vpinsrb $13, %esi, %xmm0, %xmm0
-; AVX2-NEXT: movq %rax, %rsi
-; AVX2-NEXT: shrq $48, %rsi
-; AVX2-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0
-; AVX2-NEXT: shrq $56, %rax
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX2-NEXT: vmovaps 32(%rdx), %ymm1
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
; AVX2-NEXT: vmovaps %ymm1, 32(%rcx)