From da730769fb7d41d5df53e79d928e902fb5a735f6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 11 Apr 2016 10:16:27 +0000 Subject: [PATCH] [X86] Added widening tests for and/xor/or bit operations Part of additional tests requested for D18944 llvm-svn: 265925 --- llvm/test/CodeGen/X86/widen_bitops-0.ll | 663 ++++++++++++++++++++++++++++++++ 1 file changed, 663 insertions(+) create mode 100644 llvm/test/CodeGen/X86/widen_bitops-0.ll diff --git a/llvm/test/CodeGen/X86/widen_bitops-0.ll b/llvm/test/CodeGen/X86/widen_bitops-0.ll new file mode 100644 index 0000000..0e720e7 --- /dev/null +++ b/llvm/test/CodeGen/X86/widen_bitops-0.ll @@ -0,0 +1,663 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42 + +define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: and_i24_as_v3i8: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: subl $12, %esp +; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; X32-SSE-NEXT: pand %xmm0, %xmm1 +; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: and_i24_as_v3i8: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: movd %esi, %xmm0 +; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; X64-SSE-NEXT: pand %xmm0, %xmm1 +; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <3 x i8> + %2 = bitcast i24 %b to <3 x i8> + %3 = and <3 x i8> %1, %2 + %4 = bitcast <3 x i8> %3 to i24 + ret i24 %4 +} + +define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: xor_i24_as_v3i8: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: subl $12, %esp +; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; X32-SSE-NEXT: pxor %xmm0, %xmm1 +; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: xor_i24_as_v3i8: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: movd %esi, %xmm0 +; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; X64-SSE-NEXT: pxor %xmm0, %xmm1 +; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <3 x i8> + %2 = bitcast i24 %b to <3 x i8> + %3 = xor <3 x i8> %1, %2 + %4 = bitcast <3 x i8> %3 to i24 + ret i24 %4 +} + +define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: or_i24_as_v3i8: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: subl $12, %esp +; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; X32-SSE-NEXT: por %xmm0, %xmm1 +; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: or_i24_as_v3i8: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: movd %esi, %xmm0 +; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; X64-SSE-NEXT: por %xmm0, %xmm1 +; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <3 x i8> + %2 = bitcast i24 %b to <3 x i8> + %3 = or <3 x i8> %1, %2 + %4 = bitcast <3 x i8> %3 to i24 + ret i24 %4 +} + +define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { +; X32-SSE-LABEL: and_v8i3_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: pushl %ebp +; X32-SSE-NEXT: movl %esp, %ebp +; X32-SSE-NEXT: andl $-8, %esp +; X32-SSE-NEXT: subl $24, %esp +; X32-SSE-NEXT: pextrw $7, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $6, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $5, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $4, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $3, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $2, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $1, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $7, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $6, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $5, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $4, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $3, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $2, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $1, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movd %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: shll $16, %eax +; X32-SSE-NEXT: movzwl (%esp), %ecx +; X32-SSE-NEXT: orl %eax, %ecx +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X32-SSE-NEXT: shll $16, %edx +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl %edx, %eax +; X32-SSE-NEXT: andl %ecx, %eax +; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: shrl $16, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $3, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: movl %eax, %edx +; X32-SSE-NEXT: andl $7, %edx +; X32-SSE-NEXT: movd %edx, %xmm1 +; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $6, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $9, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $12, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 +; X32-SSE-NEXT: shrl $15, %eax +; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 +; X32-SSE-NEXT: pxor %xmm0, %xmm0 +; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] +; X32-SSE-NEXT: movl %ebp, %esp +; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: and_v8i3_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: pextrw $7, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $6, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $5, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $4, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $3, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $2, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $1, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $7, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $6, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $5, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $4, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $3, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $2, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $1, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movd %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: shll $16, %eax +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx +; X64-SSE-NEXT: orl %eax, %ecx +; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: shll $16, %eax +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx +; X64-SSE-NEXT: orl %eax, %edx +; X64-SSE-NEXT: andl %ecx, %edx +; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: shrl $16, %edx +; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $3, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: movl %eax, %edx +; X64-SSE-NEXT: andl $7, %edx +; X64-SSE-NEXT: movd %edx, %xmm0 +; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $6, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $9, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $12, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 +; X64-SSE-NEXT: shrl $15, %eax +; X64-SSE-NEXT: movzwl %ax, %eax +; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 +; X64-SSE-NEXT: xorl %eax, %eax +; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 +; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: retq + %1 = bitcast <8 x i3> %a to i24 + %2 = bitcast <8 x i3> %b to i24 + %3 = and i24 %1, %2 + %4 = bitcast i24 %3 to <8 x i3> + ret <8 x i3> %4 +} + +define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { +; X32-SSE-LABEL: xor_v8i3_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: pushl %ebp +; X32-SSE-NEXT: movl %esp, %ebp +; X32-SSE-NEXT: andl $-8, %esp +; X32-SSE-NEXT: subl $24, %esp +; X32-SSE-NEXT: pextrw $7, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $6, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $5, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $4, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $3, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $2, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $1, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $7, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $6, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $5, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $4, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $3, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $2, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $1, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movd %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: shll $16, %eax +; X32-SSE-NEXT: movzwl (%esp), %ecx +; X32-SSE-NEXT: orl %eax, %ecx +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X32-SSE-NEXT: shll $16, %edx +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl %edx, %eax +; X32-SSE-NEXT: xorl %ecx, %eax +; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: shrl $16, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $3, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: movl %eax, %edx +; X32-SSE-NEXT: andl $7, %edx +; X32-SSE-NEXT: movd %edx, %xmm1 +; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $6, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $9, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $12, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 +; X32-SSE-NEXT: shrl $15, %eax +; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 +; X32-SSE-NEXT: pxor %xmm0, %xmm0 +; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] +; X32-SSE-NEXT: movl %ebp, %esp +; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: xor_v8i3_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: pextrw $7, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $6, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $5, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $4, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $3, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $2, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $1, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $7, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $6, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $5, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $4, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $3, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $2, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $1, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movd %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: shll $16, %eax +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx +; X64-SSE-NEXT: orl %eax, %ecx +; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: shll $16, %eax +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx +; X64-SSE-NEXT: orl %eax, %edx +; X64-SSE-NEXT: xorl %ecx, %edx +; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: shrl $16, %edx +; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $3, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: movl %eax, %edx +; X64-SSE-NEXT: andl $7, %edx +; X64-SSE-NEXT: movd %edx, %xmm0 +; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $6, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $9, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $12, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 +; X64-SSE-NEXT: shrl $15, %eax +; X64-SSE-NEXT: movzwl %ax, %eax +; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 +; X64-SSE-NEXT: xorl %eax, %eax +; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 +; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: retq + %1 = bitcast <8 x i3> %a to i24 + %2 = bitcast <8 x i3> %b to i24 + %3 = xor i24 %1, %2 + %4 = bitcast i24 %3 to <8 x i3> + ret <8 x i3> %4 +} + +define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { +; X32-SSE-LABEL: or_v8i3_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: pushl %ebp +; X32-SSE-NEXT: movl %esp, %ebp +; X32-SSE-NEXT: andl $-8, %esp +; X32-SSE-NEXT: subl $24, %esp +; X32-SSE-NEXT: pextrw $7, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $6, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $5, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $4, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $3, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $2, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $1, %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, (%esp) +; X32-SSE-NEXT: pextrw $7, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $6, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $5, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $4, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $3, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $2, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pextrw $1, %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movd %xmm0, %eax +; X32-SSE-NEXT: andl $15, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: shll $16, %eax +; X32-SSE-NEXT: movzwl (%esp), %ecx +; X32-SSE-NEXT: orl %eax, %ecx +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X32-SSE-NEXT: shll $16, %edx +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl %edx, %eax +; X32-SSE-NEXT: orl %ecx, %eax +; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: shrl $16, %eax +; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $3, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: movl %eax, %edx +; X32-SSE-NEXT: andl $7, %edx +; X32-SSE-NEXT: movd %edx, %xmm1 +; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $6, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $9, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 +; X32-SSE-NEXT: movl %eax, %ecx +; X32-SSE-NEXT: shrl $12, %ecx +; X32-SSE-NEXT: andl $7, %ecx +; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 +; X32-SSE-NEXT: shrl $15, %eax +; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 +; X32-SSE-NEXT: pxor %xmm0, %xmm0 +; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] +; X32-SSE-NEXT: movl %ebp, %esp +; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: or_v8i3_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: pextrw $7, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $6, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $5, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $4, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $3, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $2, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $1, %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $7, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $6, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $5, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $4, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $3, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $2, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: pextrw $1, %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movd %xmm0, %eax +; X64-SSE-NEXT: andl $15, %eax +; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: shll $16, %eax +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx +; X64-SSE-NEXT: orl %eax, %ecx +; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: shll $16, %eax +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx +; X64-SSE-NEXT: orl %eax, %edx +; X64-SSE-NEXT: orl %ecx, %edx +; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: shrl $16, %edx +; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) +; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $3, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: movl %eax, %edx +; X64-SSE-NEXT: andl $7, %edx +; X64-SSE-NEXT: movd %edx, %xmm0 +; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $6, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $9, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 +; X64-SSE-NEXT: movl %eax, %ecx +; X64-SSE-NEXT: shrl $12, %ecx +; X64-SSE-NEXT: andl $7, %ecx +; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 +; X64-SSE-NEXT: shrl $15, %eax +; X64-SSE-NEXT: movzwl %ax, %eax +; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 +; X64-SSE-NEXT: xorl %eax, %eax +; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 +; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: retq + %1 = bitcast <8 x i3> %a to i24 + %2 = bitcast <8 x i3> %b to i24 + %3 = or i24 %1, %2 + %4 = bitcast i24 %3 to <8 x i3> + ret <8 x i3> %4 +} -- 2.7.4