From b483349c5f4f61a67cfd9add21dbee2d69833b77 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 22 Oct 2022 11:35:37 +0100 Subject: [PATCH] [X86] Add v2i64/v8i16/v16i8 + AVX2 coverage to saturated shift tests To help better test the effects of D136478 --- llvm/test/CodeGen/X86/sshl_sat_vec.ll | 1352 ++++++++++++++++++++++++++++++++- llvm/test/CodeGen/X86/ushl_sat_vec.ll | 1057 +++++++++++++++++++++++++- 2 files changed, 2401 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll index 40f9025..bdae47e 100644 --- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll @@ -1,11 +1,179 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2 ; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 -declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>) -define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { -; X64-LABEL: vec: +define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { +; X64-LABEL: vec_v2i64: +; X64: # %bb.0: +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: testq %rax, %rax +; X64-NEXT: sets %dl +; X64-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF +; X64-NEXT: addq %rsi, %rdx +; X64-NEXT: movq %xmm1, %rcx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: shlq %cl, %rdi +; X64-NEXT: movq %rdi, %r8 +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: sarq %cl, %r8 +; X64-NEXT: cmpq %r8, %rax +; X64-NEXT: cmovneq %rdx, %rdi +; X64-NEXT: movq %rdi, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: testq %rax, %rax +; X64-NEXT: sets %dl +; X64-NEXT: addq %rsi, %rdx +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm0, %rcx +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: shlq %cl, %rsi +; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: sarq %cl, %rdi +; X64-NEXT: cmpq %rdi, %rax +; X64-NEXT: cmovneq %rdx, %rsi +; X64-NEXT: movq %rsi, %xmm0 +; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64-AVX2-LABEL: vec_v2i64: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrq $1, %xmm0, %rax +; X64-AVX2-NEXT: xorl %edx, %edx +; X64-AVX2-NEXT: testq %rax, %rax +; X64-AVX2-NEXT: sets %dl +; X64-AVX2-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF +; X64-AVX2-NEXT: addq %rsi, %rdx +; X64-AVX2-NEXT: vpextrq $1, %xmm1, %rcx +; X64-AVX2-NEXT: movq %rax, %rdi +; X64-AVX2-NEXT: shlq %cl, %rdi +; X64-AVX2-NEXT: movq %rdi, %r8 +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-AVX2-NEXT: sarq %cl, %r8 +; X64-AVX2-NEXT: cmpq %r8, %rax +; X64-AVX2-NEXT: cmovneq %rdx, %rdi +; X64-AVX2-NEXT: vmovq %rdi, %xmm2 +; X64-AVX2-NEXT: vmovq %xmm0, %rax +; X64-AVX2-NEXT: xorl %edx, %edx +; X64-AVX2-NEXT: testq %rax, %rax +; X64-AVX2-NEXT: sets %dl +; X64-AVX2-NEXT: addq %rsi, %rdx +; X64-AVX2-NEXT: vmovq %xmm1, %rcx +; X64-AVX2-NEXT: movq %rax, %rsi +; X64-AVX2-NEXT: shlq %cl, %rsi +; X64-AVX2-NEXT: movq %rsi, %rdi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-AVX2-NEXT: sarq %cl, %rdi +; X64-AVX2-NEXT: cmpq %rdi, %rax +; X64-AVX2-NEXT: cmovneq %rdx, %rsi +; X64-AVX2-NEXT: vmovq %rsi, %xmm0 +; X64-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v2i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %edx, %eax +; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: cmovnel %edi, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: sarl %cl, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sarl $31, %eax +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovel %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shll %cl, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: shldl %cl, %esi, %ebx +; X86-NEXT: testb $32, %ch +; X86-NEXT: cmovnel %eax, %ebx +; X86-NEXT: cmovnel %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: testb $32, %ch +; X86-NEXT: cmovel %edi, %esi +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: shrdl %cl, %edx, %eax +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shrdl %cl, %ebx, %edx +; X86-NEXT: testb $32, %ch +; X86-NEXT: cmovnel %edi, %edx +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: xorl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: xorl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: notl %edi +; X86-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl %ebp, %esi +; X86-NEXT: sarl $31, %ebp +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: xorl $2147483647, %edi # imm = 0x7FFFFFFF +; X86-NEXT: orl %esi, %edx +; X86-NEXT: notl %ebp +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: cmovel %ebx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +} + +define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { +; X64-LABEL: vec_v4i32: ; X64: # %bb.0: ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] ; X64-NEXT: movd %xmm2, %eax @@ -75,7 +243,67 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movdqa %xmm2, %xmm0 ; X64-NEXT: retq ; -; X86-LABEL: vec: +; X64-AVX2-LABEL: vec_v4i32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrd $1, %xmm0, %eax +; X64-AVX2-NEXT: vpextrd $1, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %edi, %edi +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sets %dil +; X64-AVX2-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF +; X64-AVX2-NEXT: cmpl %esi, %eax +; X64-AVX2-NEXT: cmovel %edx, %edi +; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-AVX2-NEXT: cmpl %esi, %eax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vmovd %ecx, %xmm2 +; X64-AVX2-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrd $2, %xmm0, %eax +; X64-AVX2-NEXT: vpextrd $2, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-AVX2-NEXT: cmpl %esi, %eax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrd $3, %xmm0, %eax +; X64-AVX2-NEXT: vpextrd $3, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testl %eax, %eax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-AVX2-NEXT: cmpl %esi, %eax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0 +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v4i32: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx @@ -144,3 +372,1119 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp } + +define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { +; X64-LABEL: vec_v8i16: +; X64: # %bb.0: +; X64-NEXT: pextrw $7, %xmm0, %eax +; X64-NEXT: pextrw $7, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: pextrw $6, %xmm0, %eax +; X64-NEXT: pextrw $6, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm3 +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: pextrw $5, %xmm0, %eax +; X64-NEXT: pextrw $5, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm4 +; X64-NEXT: pextrw $4, %xmm0, %eax +; X64-NEXT: pextrw $4, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X64-NEXT: pextrw $3, %xmm0, %eax +; X64-NEXT: pextrw $3, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm4 +; X64-NEXT: pextrw $2, %xmm0, %eax +; X64-NEXT: pextrw $2, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm3 +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; X64-NEXT: pextrw $1, %xmm0, %eax +; X64-NEXT: pextrw $1, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm4 +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movswl %dx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: sarl %cl, %esi +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testw %ax, %ax +; X64-NEXT: sets %cl +; X64-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-NEXT: retq +; +; X64-AVX2-LABEL: vec_v8i16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrw $1, %xmm0, %edx +; X64-AVX2-NEXT: vpextrw $1, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movswl %si, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %edi +; X64-AVX2-NEXT: xorl %eax, %eax +; X64-AVX2-NEXT: testw %dx, %dx +; X64-AVX2-NEXT: sets %al +; X64-AVX2-NEXT: addl $32767, %eax # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovel %esi, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %edx +; X64-AVX2-NEXT: vmovd %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movswl %si, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %edi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testw %dx, %dx +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovel %esi, %ecx +; X64-AVX2-NEXT: vmovd %ecx, %xmm2 +; X64-AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $2, %xmm0, %eax +; X64-AVX2-NEXT: vpextrw $2, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movswl %dx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testw %ax, %ax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %si, %ax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $3, %xmm0, %eax +; X64-AVX2-NEXT: vpextrw $3, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movswl %dx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testw %ax, %ax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %si, %ax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $4, %xmm0, %eax +; X64-AVX2-NEXT: vpextrw $4, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movswl %dx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testw %ax, %ax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %si, %ax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $5, %xmm0, %eax +; X64-AVX2-NEXT: vpextrw $5, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movswl %dx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testw %ax, %ax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %si, %ax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $6, %xmm0, %eax +; X64-AVX2-NEXT: vpextrw $6, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movswl %dx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testw %ax, %ax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %si, %ax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $7, %xmm0, %eax +; X64-AVX2-NEXT: vpextrw $7, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movswl %dx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarl %cl, %esi +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testw %ax, %ax +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X64-AVX2-NEXT: cmpw %si, %ax +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0 +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v8i16: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: movswl %bx, %ebp +; X86-NEXT: sarl %cl, %ebp +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testw %di, %di +; X86-NEXT: sets %cl +; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X86-NEXT: cmpw %bp, %di +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmovel %ebx, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movswl %di, %ebx +; X86-NEXT: sarl %cl, %ebx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testw %si, %si +; X86-NEXT: sets %al +; X86-NEXT: addl $32767, %eax # imm = 0x7FFF +; X86-NEXT: cmpw %bx, %si +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movswl %si, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testw %dx, %dx +; X86-NEXT: sets %al +; X86-NEXT: addl $32767, %eax # imm = 0x7FFF +; X86-NEXT: cmpw %di, %dx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movswl %dx, %esi +; X86-NEXT: sarl %cl, %esi +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: testw %ax, %ax +; X86-NEXT: sets %bl +; X86-NEXT: addl $32767, %ebx # imm = 0x7FFF +; X86-NEXT: cmpw %si, %ax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovel %edx, %ebx +; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movswl %dx, %esi +; X86-NEXT: sarl %cl, %esi +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testw %ax, %ax +; X86-NEXT: sets %cl +; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X86-NEXT: cmpw %si, %ax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmovel %edx, %ecx +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl %eax, %edx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movswl %dx, %esi +; X86-NEXT: sarl %cl, %esi +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: testw %ax, %ax +; X86-NEXT: sets %bl +; X86-NEXT: addl $32767, %ebx # imm = 0x7FFF +; X86-NEXT: cmpw %si, %ax +; X86-NEXT: cmovel %edx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movswl %si, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testw %ax, %ax +; X86-NEXT: sets %dl +; X86-NEXT: addl $32767, %edx # imm = 0x7FFF +; X86-NEXT: cmpw %di, %ax +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movswl %si, %edi +; X86-NEXT: sarl %cl, %edi +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testw %ax, %ax +; X86-NEXT: sets %cl +; X86-NEXT: addl $32767, %ecx # imm = 0x7FFF +; X86-NEXT: cmpw %di, %ax +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movw %cx, 14(%eax) +; X86-NEXT: movw %dx, 12(%eax) +; X86-NEXT: movw %bx, 10(%eax) +; X86-NEXT: movw %bp, 8(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 6(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 2(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, (%eax) +; X86-NEXT: addl $16, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) + ret <8 x i16> %tmp +} + +define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { +; X64-LABEL: vec_v16i8: +; X64: # %bb.0: +; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm1 +; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm1 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm3 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm1 +; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm3 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: sarb %cl, %sil +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %al, %al +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %sil, %al +; X64-NEXT: cmovel %edx, %ecx +; X64-NEXT: movd %ecx, %xmm3 +; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: movzbl %dil, %edi +; X64-NEXT: movl %edi, %r8d +; X64-NEXT: sarb %cl, %r8b +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testb %sil, %sil +; X64-NEXT: sets %cl +; X64-NEXT: addl $127, %ecx +; X64-NEXT: cmpb %r8b, %sil +; X64-NEXT: cmovel %edi, %ecx +; X64-NEXT: movd %ecx, %xmm4 +; X64-NEXT: movl %edx, %esi +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: sarb %cl, %dil +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testb %dl, %dl +; X64-NEXT: sets %al +; X64-NEXT: addl $127, %eax +; X64-NEXT: cmpb %dil, %dl +; X64-NEXT: cmovel %esi, %eax +; X64-NEXT: movd %eax, %xmm0 +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: retq +; +; X64-AVX2-LABEL: vec_v16i8: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $1, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %eax +; X64-AVX2-NEXT: shlb %cl, %al +; X64-AVX2-NEXT: movzbl %al, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %dil +; X64-AVX2-NEXT: xorl %eax, %eax +; X64-AVX2-NEXT: testb %dl, %dl +; X64-AVX2-NEXT: sets %al +; X64-AVX2-NEXT: addl $127, %eax +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovel %esi, %eax +; X64-AVX2-NEXT: vmovd %xmm1, %ecx +; X64-AVX2-NEXT: vmovd %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %dil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %dl, %dl +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovel %esi, %ecx +; X64-AVX2-NEXT: vmovd %ecx, %xmm2 +; X64-AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $2, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $2, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $3, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $4, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $4, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $5, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $5, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $6, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $7, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $7, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $8, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $9, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $10, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $10, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $11, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $12, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $13, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $14, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $15, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: sarb %cl, %sil +; X64-AVX2-NEXT: xorl %ecx, %ecx +; X64-AVX2-NEXT: testb %al, %al +; X64-AVX2-NEXT: sets %cl +; X64-AVX2-NEXT: addl $127, %ecx +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: cmovel %edx, %ecx +; X64-AVX2-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm0 +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v16i8: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $44, %esp +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movb %ch, %bh +; X86-NEXT: shlb %cl, %bh +; X86-NEXT: movzbl %bh, %esi +; X86-NEXT: sarb %cl, %bh +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %ch, %ch +; X86-NEXT: sets %al +; X86-NEXT: addl $127, %eax +; X86-NEXT: cmpb %bh, %ch +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: shlb %cl, %al +; X86-NEXT: movzbl %al, %esi +; X86-NEXT: sarb %cl, %al +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %bl, %bl +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %al, %bl +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb %dh, %al +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shlb %cl, %al +; X86-NEXT: movzbl %al, %esi +; X86-NEXT: sarb %cl, %al +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %dh, %dh +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %al, %dh +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-NEXT: movb %ah, %al +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shlb %cl, %al +; X86-NEXT: movzbl %al, %esi +; X86-NEXT: sarb %cl, %al +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb %ah, %ah +; X86-NEXT: sets %dl +; X86-NEXT: addl $127, %edx +; X86-NEXT: cmpb %al, %ah +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %esi +; X86-NEXT: sarb %cl, %dl +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %bl +; X86-NEXT: addl $127, %ebx +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovel %esi, %ebx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb %al, %ah +; X86-NEXT: shlb %cl, %ah +; X86-NEXT: movzbl %ah, %esi +; X86-NEXT: sarb %cl, %ah +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %dl +; X86-NEXT: addl $127, %edx +; X86-NEXT: cmpb %ah, %al +; X86-NEXT: cmovel %esi, %edx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb %al, %ah +; X86-NEXT: shlb %cl, %ah +; X86-NEXT: movzbl %ah, %esi +; X86-NEXT: sarb %cl, %ah +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testb %al, %al +; X86-NEXT: sets %cl +; X86-NEXT: addl $127, %ecx +; X86-NEXT: cmpb %ah, %al +; X86-NEXT: cmovel %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb %cl, 15(%eax) +; X86-NEXT: movb %dl, 14(%eax) +; X86-NEXT: movb %bl, 13(%eax) +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movb %cl, 12(%eax) +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movb %cl, 11(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 10(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 9(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 7(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 6(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 5(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 3(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 2(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 1(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, (%eax) +; X86-NEXT: addl $44, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) + ret <16 x i8> %tmp +} diff --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll index 871e905..ad9fc85e 100644 --- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll @@ -1,11 +1,151 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2 ; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 -declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8>, <16 x i8>) -define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { -; X64-LABEL: vec: +define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { +; X64-LABEL: vec_v2i64: +; X64: # %bb.0: +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: movq %xmm1, %rcx +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: shlq %cl, %rdx +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: shrq %cl, %rsi +; X64-NEXT: cmpq %rsi, %rax +; X64-NEXT: movq $-1, %rax +; X64-NEXT: cmovneq %rax, %rdx +; X64-NEXT: movq %rdx, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, %rdx +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm0, %rcx +; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: shlq %cl, %rsi +; X64-NEXT: movq %rsi, %rdi +; X64-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NEXT: shrq %cl, %rdi +; X64-NEXT: cmpq %rdi, %rdx +; X64-NEXT: cmovneq %rax, %rsi +; X64-NEXT: movq %rsi, %xmm0 +; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64-AVX2-LABEL: vec_v2i64: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrq $1, %xmm0, %rax +; X64-AVX2-NEXT: vpextrq $1, %xmm1, %rcx +; X64-AVX2-NEXT: movq %rax, %rdx +; X64-AVX2-NEXT: shlq %cl, %rdx +; X64-AVX2-NEXT: movq %rdx, %rsi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-AVX2-NEXT: shrq %cl, %rsi +; X64-AVX2-NEXT: cmpq %rsi, %rax +; X64-AVX2-NEXT: movq $-1, %rax +; X64-AVX2-NEXT: cmovneq %rax, %rdx +; X64-AVX2-NEXT: vmovq %rdx, %xmm2 +; X64-AVX2-NEXT: vmovq %xmm0, %rdx +; X64-AVX2-NEXT: vmovq %xmm1, %rcx +; X64-AVX2-NEXT: movq %rdx, %rsi +; X64-AVX2-NEXT: shlq %cl, %rsi +; X64-AVX2-NEXT: movq %rsi, %rdi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-AVX2-NEXT: shrq %cl, %rdi +; X64-AVX2-NEXT: cmpq %rdi, %rdx +; X64-AVX2-NEXT: cmovneq %rax, %rsi +; X64-AVX2-NEXT: vmovq %rsi, %xmm0 +; X64-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v2i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $16, %esp +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shll %cl, %eax +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: testb $32, %ch +; X86-NEXT: cmovnel %eax, %edx +; X86-NEXT: cmovnel %ebx, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testb $32, %ch +; X86-NEXT: cmovnel %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %esi, %edx +; X86-NEXT: cmovnel %ebx, %esi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: shrl %cl, %edi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovel %edi, %ebx +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shrdl %cl, %ebp, %eax +; X86-NEXT: testb $32, %ch +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrdl %cl, %edx, %ebp +; X86-NEXT: testb $32, {{[0-9]+}}(%esp) +; X86-NEXT: cmovnel %edi, %ebp +; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-NEXT: cmovnel %ecx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovnel %ecx, %eax +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl %ebp, %ebx +; X86-NEXT: cmovnel %ecx, %esi +; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %edx, 12(%ecx) +; X86-NEXT: movl %esi, 8(%ecx) +; X86-NEXT: movl %eax, 4(%ecx) +; X86-NEXT: movl %edi, (%ecx) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl $16, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %tmp +} + +define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { +; X64-LABEL: vec_v4i32: ; X64: # %bb.0: ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] ; X64-NEXT: movd %xmm2, %eax @@ -60,7 +200,52 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movdqa %xmm2, %xmm0 ; X64-NEXT: retq ; -; X86-LABEL: vec: +; X64-AVX2-LABEL: vec_v4i32: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrd $1, %xmm0, %eax +; X64-AVX2-NEXT: vpextrd $1, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %esi +; X64-AVX2-NEXT: cmpl %esi, %eax +; X64-AVX2-NEXT: movl $-1, %eax +; X64-AVX2-NEXT: cmovnel %eax, %edx +; X64-AVX2-NEXT: vmovd %xmm0, %esi +; X64-AVX2-NEXT: vmovd %xmm1, %ecx +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: shll %cl, %edi +; X64-AVX2-NEXT: movl %edi, %r8d +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %r8d +; X64-AVX2-NEXT: cmpl %r8d, %esi +; X64-AVX2-NEXT: cmovnel %eax, %edi +; X64-AVX2-NEXT: vmovd %edi, %xmm2 +; X64-AVX2-NEXT: vpinsrd $1, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrd $2, %xmm0, %edx +; X64-AVX2-NEXT: vpextrd $2, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpl %edi, %edx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrd $2, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrd $3, %xmm0, %edx +; X64-AVX2-NEXT: vpextrd $3, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpl %edi, %edx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrd $3, %esi, %xmm2, %xmm0 +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v4i32: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx @@ -114,3 +299,867 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { %tmp = call <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp } + +define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { +; X64-LABEL: vec_v8i16: +; X64: # %bb.0: +; X64-NEXT: pextrw $7, %xmm0, %eax +; X64-NEXT: pextrw $7, %xmm1, %ecx +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shll %cl, %edx +; X64-NEXT: movzwl %dx, %edx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %esi +; X64-NEXT: cmpw %si, %ax +; X64-NEXT: movl $65535, %eax # imm = 0xFFFF +; X64-NEXT: cmovnel %eax, %edx +; X64-NEXT: movd %edx, %xmm2 +; X64-NEXT: pextrw $6, %xmm0, %edx +; X64-NEXT: pextrw $6, %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: pextrw $5, %xmm0, %edx +; X64-NEXT: pextrw $5, %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm4 +; X64-NEXT: pextrw $4, %xmm0, %edx +; X64-NEXT: pextrw $4, %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; X64-NEXT: pextrw $3, %xmm0, %edx +; X64-NEXT: pextrw $3, %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm4 +; X64-NEXT: pextrw $2, %xmm0, %edx +; X64-NEXT: pextrw $2, %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; X64-NEXT: pextrw $1, %xmm0, %edx +; X64-NEXT: pextrw $1, %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm4 +; X64-NEXT: movd %xmm0, %edx +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: shll %cl, %esi +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: cmpw %di, %dx +; X64-NEXT: cmovnel %eax, %esi +; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-NEXT: retq +; +; X64-AVX2-LABEL: vec_v8i16: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrw $1, %xmm0, %eax +; X64-AVX2-NEXT: vpextrw $1, %xmm1, %ecx +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shll %cl, %edx +; X64-AVX2-NEXT: movzwl %dx, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %esi +; X64-AVX2-NEXT: cmpw %si, %ax +; X64-AVX2-NEXT: movl $65535, %eax # imm = 0xFFFF +; X64-AVX2-NEXT: cmovnel %eax, %edx +; X64-AVX2-NEXT: vmovd %xmm0, %esi +; X64-AVX2-NEXT: vmovd %xmm1, %ecx +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: shll %cl, %edi +; X64-AVX2-NEXT: movzwl %di, %edi +; X64-AVX2-NEXT: movl %edi, %r8d +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %r8d +; X64-AVX2-NEXT: cmpw %r8w, %si +; X64-AVX2-NEXT: cmovnel %eax, %edi +; X64-AVX2-NEXT: vmovd %edi, %xmm2 +; X64-AVX2-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $2, %xmm0, %edx +; X64-AVX2-NEXT: vpextrw $2, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movzwl %si, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrw $2, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $3, %xmm0, %edx +; X64-AVX2-NEXT: vpextrw $3, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movzwl %si, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrw $3, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $4, %xmm0, %edx +; X64-AVX2-NEXT: vpextrw $4, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movzwl %si, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrw $4, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $5, %xmm0, %edx +; X64-AVX2-NEXT: vpextrw $5, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movzwl %si, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrw $5, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $6, %xmm0, %edx +; X64-AVX2-NEXT: vpextrw $6, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movzwl %si, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrw $6, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrw $7, %xmm0, %edx +; X64-AVX2-NEXT: vpextrw $7, %xmm1, %ecx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shll %cl, %esi +; X64-AVX2-NEXT: movzwl %si, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrl %cl, %edi +; X64-AVX2-NEXT: cmpw %di, %dx +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrw $7, %esi, %xmm2, %xmm0 +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v8i16: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl %eax, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movzwl %di, %ebx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: shrl %cl, %edi +; X86-NEXT: cmpw %di, %ax +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovnel %eax, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edi +; X86-NEXT: movb %ch, %cl +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movzwl %di, %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: shrl %cl, %edi +; X86-NEXT: cmpw %di, %si +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $65535, %edi # imm = 0xFFFF +; X86-NEXT: cmovnel %edi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movzwl %si, %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpw %si, %dx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovnel %edi, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl $65535, %esi # imm = 0xFFFF +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movzwl %dx, %ebp +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: shrl %cl, %edx +; X86-NEXT: cmpw %dx, %ax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovnel %esi, %ebp +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movzwl %si, %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpw %si, %dx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmovnel %eax, %ebx +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: shll %cl, %esi +; X86-NEXT: movzwl %si, %edi +; X86-NEXT: movl %edi, %esi +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: cmpw %si, %dx +; X86-NEXT: cmovnel %eax, %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movzwl %dx, %esi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: shrl %cl, %edx +; X86-NEXT: cmpw %dx, %ax +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: shll %cl, %edx +; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmpw %ax, %cx +; X86-NEXT: movl $65535, %eax # imm = 0xFFFF +; X86-NEXT: cmovnel %eax, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movw %dx, 14(%eax) +; X86-NEXT: movw %si, 12(%eax) +; X86-NEXT: movw %di, 10(%eax) +; X86-NEXT: movw %bx, 8(%eax) +; X86-NEXT: movw %bp, 6(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, 2(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movw %cx, (%eax) +; X86-NEXT: addl $12, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) + ret <8 x i16> %tmp +} + +define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { +; X64-LABEL: vec_v16i8: +; X64: # %bb.0: +; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %edx +; X64-NEXT: shlb %cl, %dl +; X64-NEXT: movzbl %dl, %esi +; X64-NEXT: movl %esi, %edx +; X64-NEXT: shrb %cl, %dl +; X64-NEXT: cmpb %dl, %al +; X64-NEXT: movl $255, %edx +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm1 +; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm1 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm1 +; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movl %eax, %esi +; X64-NEXT: shlb %cl, %sil +; X64-NEXT: movzbl %sil, %esi +; X64-NEXT: movl %esi, %edi +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: cmpb %dil, %al +; X64-NEXT: cmovnel %edx, %esi +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi +; X64-NEXT: movl %edi, %r8d +; X64-NEXT: shlb %cl, %r8b +; X64-NEXT: movzbl %r8b, %r8d +; X64-NEXT: movl %r8d, %r9d +; X64-NEXT: shrb %cl, %r9b +; X64-NEXT: cmpb %r9b, %dil +; X64-NEXT: cmovnel %edx, %r8d +; X64-NEXT: movd %r8d, %xmm4 +; X64-NEXT: movl %esi, %edi +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: movzbl %dil, %edi +; X64-NEXT: movl %edi, %r8d +; X64-NEXT: shrb %cl, %r8b +; X64-NEXT: cmpb %r8b, %sil +; X64-NEXT: cmovnel %edx, %edi +; X64-NEXT: movd %edi, %xmm0 +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: retq +; +; X64-AVX2-LABEL: vec_v16i8: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpextrb $1, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $1, %xmm0, %eax +; X64-AVX2-NEXT: movl %eax, %edx +; X64-AVX2-NEXT: shlb %cl, %dl +; X64-AVX2-NEXT: movzbl %dl, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %sil +; X64-AVX2-NEXT: cmpb %sil, %al +; X64-AVX2-NEXT: movl $255, %eax +; X64-AVX2-NEXT: cmovnel %eax, %edx +; X64-AVX2-NEXT: vmovd %xmm1, %ecx +; X64-AVX2-NEXT: vmovd %xmm0, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: shlb %cl, %dil +; X64-AVX2-NEXT: movzbl %dil, %edi +; X64-AVX2-NEXT: movl %edi, %r8d +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %r8b +; X64-AVX2-NEXT: cmpb %r8b, %sil +; X64-AVX2-NEXT: cmovnel %eax, %edi +; X64-AVX2-NEXT: vmovd %edi, %xmm2 +; X64-AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $2, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $2, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $2, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $3, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $3, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $3, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $4, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $4, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $5, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $5, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $5, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $6, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $6, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $6, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $7, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $7, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $7, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $8, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $8, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $8, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $9, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $9, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $9, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $10, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $10, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $10, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $11, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $11, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $11, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $12, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $12, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $12, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $13, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $13, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $14, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $14, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $14, %esi, %xmm2, %xmm2 +; X64-AVX2-NEXT: vpextrb $15, %xmm1, %ecx +; X64-AVX2-NEXT: vpextrb $15, %xmm0, %edx +; X64-AVX2-NEXT: movl %edx, %esi +; X64-AVX2-NEXT: shlb %cl, %sil +; X64-AVX2-NEXT: movzbl %sil, %esi +; X64-AVX2-NEXT: movl %esi, %edi +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-AVX2-NEXT: shrb %cl, %dil +; X64-AVX2-NEXT: cmpb %dil, %dl +; X64-AVX2-NEXT: cmovnel %eax, %esi +; X64-AVX2-NEXT: vpinsrb $15, %esi, %xmm2, %xmm0 +; X64-AVX2-NEXT: retq +; +; X86-LABEL: vec_v16i8: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $48, %esp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-NEXT: movb {{[0-9]+}}(%esp), %dh +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movb %bl, %bh +; X86-NEXT: shlb %cl, %bh +; X86-NEXT: movzbl %bh, %edi +; X86-NEXT: shrb %cl, %bh +; X86-NEXT: cmpb %bh, %bl +; X86-NEXT: movl $255, %esi +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb %dh, %bl +; X86-NEXT: movb %ah, %cl +; X86-NEXT: shlb %cl, %bl +; X86-NEXT: movzbl %bl, %edi +; X86-NEXT: shrb %cl, %bl +; X86-NEXT: cmpb %bl, %dh +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb %ch, %ah +; X86-NEXT: movb %dl, %cl +; X86-NEXT: shlb %cl, %ah +; X86-NEXT: movzbl %ah, %edi +; X86-NEXT: shrb %cl, %ah +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpb %ah, %ch +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movb %dl, %ah +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shlb %cl, %ah +; X86-NEXT: movzbl %ah, %edi +; X86-NEXT: shrb %cl, %ah +; X86-NEXT: cmpb %ah, %dl +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %ebp +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %ebp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %edi +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %edi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb %cl, %dl +; X86-NEXT: movzbl %dl, %ebx +; X86-NEXT: shrb %cl, %dl +; X86-NEXT: cmpb %dl, %al +; X86-NEXT: cmovnel %esi, %ebx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb %al, %ah +; X86-NEXT: shlb %cl, %ah +; X86-NEXT: movzbl %ah, %edx +; X86-NEXT: shrb %cl, %ah +; X86-NEXT: cmpb %ah, %al +; X86-NEXT: cmovnel %esi, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb %dl, 15(%eax) +; X86-NEXT: movb %bl, 14(%eax) +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movb %cl, 13(%eax) +; X86-NEXT: movl %ebp, %ecx +; X86-NEXT: movb %cl, 12(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 11(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 10(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 9(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 7(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 6(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 5(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 4(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 3(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 2(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, 1(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movb %cl, (%eax) +; X86-NEXT: addl $48, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %tmp = call <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) + ret <16 x i8> %tmp +} -- 2.7.4