From: Sanjay Patel Date: Tue, 24 Oct 2017 15:27:47 +0000 (+0000) Subject: [x86] add more vector ISA variants for memcmp expansion; NFC X-Git-Tag: llvmorg-6.0.0-rc1~5049 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f762c7b32f33f9fe984c229c9fe7f178ba657c95;p=platform%2Fupstream%2Fllvm.git [x86] add more vector ISA variants for memcmp expansion; NFC ...because every swiss cheese has different holes. llvm-svn: 316446 --- diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index a269529..b4d5148 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=cmov | FileCheck %s --check-prefix=X86 --check-prefix=X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=X86 --check-prefix=SSE --check-prefix=X86-SSE1 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=SSE --check-prefix=X86-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 @@ -584,6 +586,18 @@ define i1 @length16_eq(i8* %x, i8* %y) nounwind { ; X86-NOSSE-NEXT: setne %al ; X86-NOSSE-NEXT: retl ; +; X86-SSE1-LABEL: length16_eq: +; X86-SSE1: # BB#0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $16 +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: setne %al +; X86-SSE1-NEXT: retl +; ; X86-SSE2-LABEL: length16_eq: ; X86-SSE2: # BB#0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -630,6 +644,18 @@ define i1 @length16_eq_const(i8* %X) nounwind { ; X86-NOSSE-NEXT: sete %al ; X86-NOSSE-NEXT: retl ; +; X86-SSE1-LABEL: length16_eq_const: +; X86-SSE1: # BB#0: +; X86-SSE1-NEXT: pushl $0 +; X86-SSE1-NEXT: pushl $16 +; X86-SSE1-NEXT: pushl $.L.str +; X86-SSE1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE1-NEXT: calll memcmp +; X86-SSE1-NEXT: addl $16, %esp +; X86-SSE1-NEXT: testl %eax, %eax +; X86-SSE1-NEXT: sete %al +; X86-SSE1-NEXT: retl +; ; X86-SSE2-LABEL: length16_eq_const: ; X86-SSE2: # BB#0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -781,6 +807,22 @@ define i1 @length32_eq(i8* %x, i8* %y) nounwind { ; X64-SSE2-NEXT: popq %rcx ; X64-SSE2-NEXT: retq ; +; X64-AVX1-LABEL: length32_eq: +; X64-AVX1: # BB#0: +; X64-AVX1-NEXT: movq 16(%rdi), %rax +; X64-AVX1-NEXT: movq (%rdi), %rcx +; X64-AVX1-NEXT: movq 8(%rdi), %rdx +; X64-AVX1-NEXT: movq 24(%rdi), %rdi +; X64-AVX1-NEXT: xorq 24(%rsi), %rdi +; X64-AVX1-NEXT: xorq 8(%rsi), %rdx +; X64-AVX1-NEXT: orq %rdi, %rdx +; X64-AVX1-NEXT: xorq 16(%rsi), %rax +; X64-AVX1-NEXT: xorq (%rsi), %rcx +; X64-AVX1-NEXT: orq %rax, %rcx +; X64-AVX1-NEXT: orq %rdx, %rcx +; X64-AVX1-NEXT: sete %al +; X64-AVX1-NEXT: retq +; ; X64-AVX2-LABEL: length32_eq: ; X64-AVX2: # BB#0: ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0 @@ -819,6 +861,22 @@ define i1 @length32_eq_const(i8* %X) nounwind { ; X64-SSE2-NEXT: popq %rcx ; X64-SSE2-NEXT: retq ; +; X64-AVX1-LABEL: length32_eq_const: +; X64-AVX1: # BB#0: +; X64-AVX1-NEXT: movabsq $3544395820347831604, %rax # imm = 0x3130393837363534 +; X64-AVX1-NEXT: xorq 24(%rdi), %rax +; X64-AVX1-NEXT: movabsq $3833745473465760056, %rcx # imm = 0x3534333231303938 +; X64-AVX1-NEXT: xorq 8(%rdi), %rcx +; X64-AVX1-NEXT: orq %rax, %rcx +; X64-AVX1-NEXT: movabsq $3689065127958034230, %rax # imm = 0x3332313039383736 +; X64-AVX1-NEXT: xorq 16(%rdi), %rax +; X64-AVX1-NEXT: movabsq $3978425819141910832, %rdx # imm = 0x3736353433323130 +; X64-AVX1-NEXT: xorq (%rdi), %rdx +; X64-AVX1-NEXT: orq %rax, %rdx +; X64-AVX1-NEXT: orq %rcx, %rdx +; X64-AVX1-NEXT: setne %al +; X64-AVX1-NEXT: retq +; ; X64-AVX2-LABEL: length32_eq_const: ; X64-AVX2: # BB#0: ; X64-AVX2-NEXT: vmovdqu (%rdi), %ymm0