From e9f82e0d1d70f361a40f1853c928df04918a38f5 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 14 Aug 2010 22:04:01 -0700 Subject: [PATCH] Add optimized strncasecmp versions for x86-64. --- ChangeLog | 17 + NEWS | 5 +- string/Makefile | 2 +- string/test-strncasecmp.c | 318 +++++++++++++++++ sysdeps/x86_64/Makefile | 2 +- sysdeps/x86_64/multiarch/Makefile | 3 +- sysdeps/x86_64/multiarch/strcmp.S | 514 ++++++++++++++++------------ sysdeps/x86_64/multiarch/strncase_l-ssse3.S | 6 + sysdeps/x86_64/multiarch/strncase_l.S | 6 + sysdeps/x86_64/strcasecmp_l-nonascii.c | 3 + sysdeps/x86_64/strcmp.S | 169 +++++---- sysdeps/x86_64/strncase.S | 1 + sysdeps/x86_64/strncase_l-nonascii.c | 8 + sysdeps/x86_64/strncase_l.S | 6 + 14 files changed, 774 insertions(+), 286 deletions(-) create mode 100644 string/test-strncasecmp.c create mode 100644 sysdeps/x86_64/multiarch/strncase_l-ssse3.S create mode 100644 sysdeps/x86_64/multiarch/strncase_l.S create mode 100644 sysdeps/x86_64/strncase.S create mode 100644 sysdeps/x86_64/strncase_l-nonascii.c create mode 100644 sysdeps/x86_64/strncase_l.S diff --git a/ChangeLog b/ChangeLog index 21f9ed7..dd78abe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,22 @@ 2010-08-14 Ulrich Drepper + * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add + strncase_l-nonascii. + * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines): + Add strncase_l-ssse3. + * sysdeps/x86_64/multiarch/strcmp.S: Prepare for use as strncasecmp. + * sysdeps/x86_64/strcmp.S: Likewise. + * sysdeps/x86_64/multiarch/strncase_l-ssse3.S: New file. + * sysdeps/x86_64/multiarch/strncase_l.S: New file. + * sysdeps/x86_64/strncase.S: New file. + * sysdeps/x86_64/strncase_l-nonascii.c: New file. + * sysdeps/x86_64/strncase_l.S: New file. + * string/Makefile (strop-tests): Add strncasecmp. + * string/test-strncasecmp.c: New file. + + * sysdeps/x86_64/strcasecmp_l-nonascii.c: Add prototype to avoid + warning. + * sysdeps/x86_64/strcmp.S: Move definition of NO_NOLOCALE_ALIAS to... * sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S: ... here. diff --git a/NEWS b/NEWS index eba00f4..f3094d1 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU C Library NEWS -- history of user-visible changes. 2010-8-12 +GNU C Library NEWS -- history of user-visible changes. 2010-8-14 Copyright (C) 1992-2009, 2010 Free Software Foundation, Inc. See the end for copying conditions. @@ -15,7 +15,8 @@ Version 2.13 * POWER7 optimizations: memset, memcmp, strncmp -* New optimized string functions for x86-64: strnlen, strcasecmp +* New optimized string functions for x86-64: strnlen (SSE2), + strcasecmp (SSE2, SSSE3, SSE4.2), strncasecmp (SSE2, SSSE3, SSE4.2) Implemented by Ulrich Drepper. Version 2.12 diff --git a/string/Makefile b/string/Makefile index 4c160e9..cc2da10 100644 --- a/string/Makefile +++ b/string/Makefile @@ -49,7 +49,7 @@ o-objects.ob := memcpy.o memset.o memchr.o strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \ stpcpy stpncpy strcat strchr strcmp strcpy strcspn \ strlen strncmp strncpy strpbrk strrchr strspn memmem \ - strstr strcasestr strnlen strcasecmp + strstr strcasestr strnlen strcasecmp strncasecmp tests := tester inl-tester noinl-tester testcopy test-ffs \ tst-strlen stratcliff tst-svc tst-inlcall \ bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \ diff --git a/string/test-strncasecmp.c b/string/test-strncasecmp.c new file mode 100644 index 0000000..80e4d63 --- /dev/null +++ b/string/test-strncasecmp.c @@ -0,0 +1,318 @@ +/* Test and measure strncasecmp functions. + Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Jakub Jelinek , 1999. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#define TEST_MAIN +#include "test-string.h" + +typedef int (*proto_t) (const char *, const char *, size_t); +static int simple_strncasecmp (const char *, const char *, size_t); +static int stupid_strncasecmp (const char *, const char *, size_t); + +IMPL (stupid_strncasecmp, 0) +IMPL (simple_strncasecmp, 0) +IMPL (strncasecmp, 1) + +static int +simple_strncasecmp (const char *s1, const char *s2, size_t n) +{ + int ret; + + if (n == 0) + return 0; + + while ((ret = ((unsigned char) tolower (*s1) + - (unsigned char) tolower (*s2))) == 0 + && *s1++) + { + if (--n == 0) + return 0; + ++s2; + } + return ret; +} + +static int +stupid_strncasecmp (const char *s1, const char *s2, size_t max) +{ + size_t ns1 = strlen (s1) + 1; + size_t ns2 = strlen (s2) + 1; + size_t n = ns1 < ns2 ? ns1 : ns2; + if (n > max) + n = max; + int ret = 0; + + while (n--) + { + if ((ret = ((unsigned char) tolower (*s1) + - (unsigned char) tolower (*s2))) != 0) + break; + ++s1; + ++s2; + } + return ret; +} + +static void +do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n, + int exp_result) +{ + int result = CALL (impl, s1, s2, n); + if ((exp_result == 0 && result != 0) + || (exp_result < 0 && result >= 0) + || (exp_result > 0 && result <= 0)) + { + error (0, 0, "Wrong result in function %s %d %d", impl->name, + result, exp_result); + ret = 1; + return; + } + + if (HP_TIMING_AVAIL) + { + hp_timing_t start __attribute ((unused)); + hp_timing_t stop __attribute ((unused)); + hp_timing_t best_time = ~ (hp_timing_t) 0; + size_t i; + + for (i = 0; i < 32; ++i) + { + HP_TIMING_NOW (start); + CALL (impl, s1, s2, n); + HP_TIMING_NOW (stop); + HP_TIMING_BEST (best_time, start, stop); + } + + printf ("\t%zd", (size_t) best_time); + } +} + +static void +do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char, + int exp_result) +{ + size_t i; + char *s1, *s2; + + if (len == 0) + return; + + align1 &= 7; + if (align1 + len + 1 >= page_size) + return; + + align2 &= 7; + if (align2 + len + 1 >= page_size) + return; + + s1 = (char *) (buf1 + align1); + s2 = (char *) (buf2 + align2); + + for (i = 0; i < len; i++) + { + s1[i] = toupper (1 + 23 * i % max_char); + s2[i] = tolower (s1[i]); + } + + s1[len] = s2[len] = 0; + s1[len + 1] = 23; + s2[len + 1] = 24 + exp_result; + if ((s2[len - 1] == 'z' && exp_result == -1) + || (s2[len - 1] == 'a' && exp_result == 1)) + s1[len - 1] += exp_result; + else + s2[len - 1] -= exp_result; + + if (HP_TIMING_AVAIL) + printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2); + + FOR_EACH_IMPL (impl, 0) + do_one_test (impl, s1, s2, n, exp_result); + + if (HP_TIMING_AVAIL) + putchar ('\n'); +} + +static void +do_random_tests (void) +{ + size_t i, j, n, align1, align2, pos, len1, len2; + int result; + long r; + unsigned char *p1 = buf1 + page_size - 512; + unsigned char *p2 = buf2 + page_size - 512; + + for (n = 0; n < ITERATIONS; n++) + { + align1 = random () & 31; + if (random () & 1) + align2 = random () & 31; + else + align2 = align1 + (random () & 24); + pos = random () & 511; + j = align1 > align2 ? align1 : align2; + if (pos + j >= 511) + pos = 510 - j - (random () & 7); + len1 = random () & 511; + if (pos >= len1 && (random () & 1)) + len1 = pos + (random () & 7); + if (len1 + j >= 512) + len1 = 511 - j - (random () & 7); + if (pos >= len1) + len2 = len1; + else + len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0); + j = (pos > len2 ? pos : len2) + align1 + 64; + if (j > 512) + j = 512; + for (i = 0; i < j; ++i) + { + p1[i] = tolower (random () & 255); + if (i < len1 + align1 && !p1[i]) + { + p1[i] = tolower (random () & 255); + if (!p1[i]) + p1[i] = tolower (1 + (random () & 127)); + } + } + for (i = 0; i < j; ++i) + { + p2[i] = toupper (random () & 255); + if (i < len2 + align2 && !p2[i]) + { + p2[i] = toupper (random () & 255); + if (!p2[i]) + toupper (p2[i] = 1 + (random () & 127)); + } + } + + result = 0; + memcpy (p2 + align2, p1 + align1, pos); + if (pos < len1) + { + if (tolower (p2[align2 + pos]) == p1[align1 + pos]) + { + p2[align2 + pos] = toupper (random () & 255); + if (tolower (p2[align2 + pos]) == p1[align1 + pos]) + p2[align2 + pos] = toupper (p1[align1 + pos] + + 3 + (random () & 127)); + } + + if (p1[align1 + pos] < tolower (p2[align2 + pos])) + result = -1; + else + result = 1; + } + p1[len1 + align1] = 0; + p2[len2 + align2] = 0; + + FOR_EACH_IMPL (impl, 1) + { + r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2), + pos + 1 + (random () & 255)); + /* Test whether on 64-bit architectures where ABI requires + callee to promote has the promotion been done. */ + asm ("" : "=g" (r) : "0" (r)); + if ((r == 0 && result) + || (r < 0 && result >= 0) + || (r > 0 && result <= 0)) + { + error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p", + n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2); + ret = 1; + } + } + } +} + +int +test_main (void) +{ + size_t i; + + test_init (); + + printf ("%23s", ""); + FOR_EACH_IMPL (impl, 0) + printf ("\t%s", impl->name); + putchar ('\n'); + + for (i = 1; i < 16; ++i) + { + do_test (i, i, i - 1, i, 127, 0); + + do_test (i, i, i, i, 127, 0); + do_test (i, i, i, i, 127, 1); + do_test (i, i, i, i, 127, -1); + + do_test (i, i, i + 1, i, 127, 0); + do_test (i, i, i + 1, i, 127, 1); + do_test (i, i, i + 1, i, 127, -1); + } + + for (i = 1; i < 10; ++i) + { + do_test (0, 0, (2 << i) - 1, 2 << i, 127, 0); + do_test (0, 0, 2 << i, 2 << i, 254, 0); + do_test (0, 0, (2 << i) + 1, 2 << i, 127, 0); + + do_test (0, 0, (2 << i) + 1, 2 << i, 254, 0); + + do_test (0, 0, 2 << i, 2 << i, 127, 1); + do_test (0, 0, (2 << i) + 10, 2 << i, 127, 1); + + do_test (0, 0, 2 << i, 2 << i, 254, 1); + do_test (0, 0, (2 << i) + 10, 2 << i, 254, 1); + + do_test (0, 0, 2 << i, 2 << i, 127, -1); + do_test (0, 0, (2 << i) + 10, 2 << i, 127, -1); + + do_test (0, 0, 2 << i, 2 << i, 254, -1); + do_test (0, 0, (2 << i) + 10, 2 << i, 254, -1); + } + + for (i = 1; i < 8; ++i) + { + do_test (i, 2 * i, (8 << i) - 1, 8 << i, 127, 0); + do_test (i, 2 * i, 8 << i, 8 << i, 127, 0); + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 0); + + do_test (2 * i, i, (8 << i) - 1, 8 << i, 254, 0); + do_test (2 * i, i, 8 << i, 8 << i, 254, 0); + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 0); + + do_test (i, 2 * i, 8 << i, 8 << i, 127, 1); + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 1); + + do_test (2 * i, i, 8 << i, 8 << i, 254, 1); + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 1); + + do_test (i, 2 * i, 8 << i, 8 << i, 127, -1); + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, -1); + + do_test (2 * i, i, 8 << i, 8 << i, 254, -1); + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, -1); + } + + do_random_tests (); + return ret; +} + +#include "../test-skeleton.c" diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index f7eeb15..b989f6a 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -12,7 +12,7 @@ sysdep_routines += _mcount endif ifeq ($(subdir),string) -sysdep_routines += cacheinfo strcasecmp_l-nonascii +sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii gen-as-const-headers += locale-defines.sym endif diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 5113dc1..b124524 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -7,7 +7,8 @@ ifeq ($(subdir),string) sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ - memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 + memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ + strncase_l-ssse3 ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c CFLAGS-strcspn-c.c += -msse4 diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S index 3726dbe..764eb09 100644 --- a/sysdeps/x86_64/multiarch/strcmp.S +++ b/sysdeps/x86_64/multiarch/strcmp.S @@ -46,6 +46,24 @@ # define STRCMP_SSSE3 __strcasecmp_l_ssse3 # define STRCMP_SSE2 __strcasecmp_l_sse2 # define __GI_STRCMP __GI___strcasecmp_l +#elif defined USE_AS_STRNCASECMP_L +# include "locale-defines.h" + +/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz + if the new counter > the old one or is 0. */ +# define UPDATE_STRNCMP_COUNTER \ + /* calculate left number to compare */ \ + lea -16(%rcx, %r11), %r9; \ + cmp %r9, %r11; \ + jb LABEL(strcmp_exitz_sse4_2); \ + test %r9, %r9; \ + je LABEL(strcmp_exitz_sse4_2); \ + mov %r9, %r11 + +# define STRCMP_SSE42 __strncasecmp_l_sse42 +# define STRCMP_SSSE3 __strncasecmp_l_ssse3 +# define STRCMP_SSE2 __strncasecmp_l_sse2 +# define __GI_STRCMP __GI___strncasecmp_l #else # define UPDATE_STRNCMP_COUNTER # ifndef STRCMP @@ -100,6 +118,24 @@ ENTRY(__strcasecmp) END(__strcasecmp) weak_alias (__strcasecmp, strcasecmp) # endif +# ifdef USE_AS_STRNCASECMP_L +ENTRY(__strncasecmp) + .type __strncasecmp, @gnu_indirect_function + cmpl $0, __cpu_features+KIND_OFFSET(%rip) + jne 1f + call __init_cpu_features +1: + leaq __strncasecmp_sse42(%rip), %rax + testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) + jnz 2f + leaq __strncasecmp_ssse3(%rip), %rax + testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) + jnz 2f + leaq __strncasecmp_sse2(%rip), %rax +2: ret +END(__strncasecmp) +weak_alias (__strncasecmp, strncasecmp) +# endif /* We use 0x1a: _SIDD_SBYTE_OPS @@ -131,15 +167,28 @@ weak_alias (__strcasecmp, strcasecmp) .section .text.sse4.2,"ax",@progbits .align 16 .type STRCMP_SSE42, @function -#ifdef USE_AS_STRCASECMP_L - /* 5-byte NOP. */ - .byte 0x0f,0x1f,0x44,0x00,0x00 +# ifdef USE_AS_STRCASECMP_L ENTRY (__strcasecmp_sse42) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax movq %fs:(%rax),%rdx + + // XXX 5 byte should be before the function + /* 5-byte NOP. */ + .byte 0x0f,0x1f,0x44,0x00,0x00 END (__strcasecmp_sse42) /* FALLTHROUGH to strcasecmp_l. */ -#endif +# endif +# ifdef USE_AS_STRNCASECMP_L +ENTRY (__strncasecmp_sse42) + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax + movq %fs:(%rax),%r10 + + // XXX 5 byte should be before the function + /* 5-byte NOP. */ + .byte 0x0f,0x1f,0x44,0x00,0x00 +END (__strncasecmp_sse42) + /* FALLTHROUGH to strncasecmp_l. */ +# endif STRCMP_SSE42: cfi_startproc @@ -148,31 +197,42 @@ STRCMP_SSE42: /* * This implementation uses SSE to compare up to 16 bytes at a time. */ -#ifdef USE_AS_STRCASECMP_L +# ifdef USE_AS_STRCASECMP_L /* We have to fall back on the C implementation for locales with encodings not matching ASCII for single bytes. */ -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax -# else +# else movq (%rdx), %rax -# endif +# endif testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strcasecmp_l_nonascii -#endif +# endif +# ifdef USE_AS_STRNCASECMP_L + /* We have to fall back on the C implementation for locales + with encodings not matching ASCII for single bytes. */ +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movq LOCALE_T___LOCALES+LC_CTYPE*8(%r10), %rax +# else + movq (%r10), %rax +# endif + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) + jne __strncasecmp_l_nonascii +# endif -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L test %rdx, %rdx je LABEL(strcmp_exitz_sse4_2) cmp $1, %rdx je LABEL(Byte0_sse4_2) mov %rdx, %r11 -#endif +# endif mov %esi, %ecx mov %edi, %eax /* Use 64bit AND here to avoid long NOP padding. */ and $0x3f, %rcx /* rsi alignment in cache line */ and $0x3f, %rax /* rdi alignment in cache line */ -#ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L .section .rodata.cst16,"aM",@progbits,16 .align 16 .Lbelowupper_sse4: @@ -186,19 +246,19 @@ STRCMP_SSE42: .quad 0x2020202020202020 .previous movdqa .Lbelowupper_sse4(%rip), %xmm4 -# define UCLOW_reg %xmm4 +# define UCLOW_reg %xmm4 movdqa .Ltopupper_sse4(%rip), %xmm5 -# define UCHIGH_reg %xmm5 +# define UCHIGH_reg %xmm5 movdqa .Ltouppermask_sse4(%rip), %xmm6 -# define LCQWORD_reg %xmm6 -#endif +# define LCQWORD_reg %xmm6 +# endif cmp $0x30, %ecx ja LABEL(crosscache_sse4_2)/* rsi: 16-byte load will cross cache line */ cmp $0x30, %eax ja LABEL(crosscache_sse4_2)/* rdi: 16-byte load will cross cache line */ movdqu (%rdi), %xmm1 movdqu (%rsi), %xmm2 -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L # define TOLOWER(reg1, reg2) \ movdqa reg1, %xmm7; \ movdqa UCHIGH_reg, %xmm8; \ @@ -225,10 +285,10 @@ STRCMP_SSE42: pmovmskb %xmm1, %edx sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ jnz LABEL(less16bytes_sse4_2)/* If not, find different value or null char */ -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2)/* finish comparision */ -#endif +# endif add $16, %rsi /* prepare to search next 16 bytes */ add $16, %rdi /* prepare to search next 16 bytes */ @@ -270,13 +330,13 @@ LABEL(ashr_0_sse4_2): movdqa (%rsi), %xmm1 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ -#else +# else movdqa (%rdi), %xmm2 TOLOWER (%xmm1, %xmm2) pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ -#endif +# endif psubb %xmm0, %xmm1 /* packed sub of comparison results*/ pmovmskb %xmm1, %r9d shr %cl, %edx /* adjust 0xffff for offset */ @@ -300,48 +360,48 @@ LABEL(ashr_0_sse4_2): .p2align 4 LABEL(ashr_0_use_sse4_2): movdqa (%rdi,%rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif lea 16(%rdx), %rdx jbe LABEL(ashr_0_use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif movdqa (%rdi,%rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif lea 16(%rdx), %rdx jbe LABEL(ashr_0_use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif jmp LABEL(ashr_0_use_sse4_2) .p2align 4 LABEL(ashr_0_use_sse4_2_exit): jnc LABEL(strcmp_exitz_sse4_2) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub %rcx, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif lea -16(%rdx, %rcx), %rcx movzbl (%rdi, %rcx), %eax movzbl (%rsi, %rcx), %edx -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx movl (%rcx,%rax,4), %eax movl (%rcx,%rdx,4), %edx @@ -394,18 +454,18 @@ LABEL(loop_ashr_1_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $1, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -413,18 +473,18 @@ LABEL(loop_ashr_1_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $1, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_1_use_sse4_2) @@ -434,10 +494,10 @@ LABEL(nibble_ashr_1_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $1, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $14, %ecx ja LABEL(loop_ashr_1_use_sse4_2) @@ -486,18 +546,18 @@ LABEL(loop_ashr_2_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $2, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -505,18 +565,18 @@ LABEL(loop_ashr_2_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $2, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_2_use_sse4_2) @@ -526,10 +586,10 @@ LABEL(nibble_ashr_2_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $2, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $13, %ecx ja LABEL(loop_ashr_2_use_sse4_2) @@ -578,18 +638,18 @@ LABEL(loop_ashr_3_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $3, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -597,18 +657,18 @@ LABEL(loop_ashr_3_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $3, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_3_use_sse4_2) @@ -618,10 +678,10 @@ LABEL(nibble_ashr_3_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $3, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $12, %ecx ja LABEL(loop_ashr_3_use_sse4_2) @@ -671,18 +731,18 @@ LABEL(loop_ashr_4_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $4, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -690,18 +750,18 @@ LABEL(loop_ashr_4_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $4, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_4_use_sse4_2) @@ -711,10 +771,10 @@ LABEL(nibble_ashr_4_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $4, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $11, %ecx ja LABEL(loop_ashr_4_use_sse4_2) @@ -764,18 +824,18 @@ LABEL(loop_ashr_5_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $5, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L || !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -784,18 +844,18 @@ LABEL(loop_ashr_5_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $5, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_5_use_sse4_2) @@ -805,10 +865,10 @@ LABEL(nibble_ashr_5_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $5, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $10, %ecx ja LABEL(loop_ashr_5_use_sse4_2) @@ -858,18 +918,18 @@ LABEL(loop_ashr_6_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $6, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -877,18 +937,18 @@ LABEL(loop_ashr_6_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $6, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_6_use_sse4_2) @@ -898,10 +958,10 @@ LABEL(nibble_ashr_6_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $6, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $9, %ecx ja LABEL(loop_ashr_6_use_sse4_2) @@ -951,18 +1011,18 @@ LABEL(loop_ashr_7_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $7, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -970,18 +1030,18 @@ LABEL(loop_ashr_7_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $7, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a,(%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_7_use_sse4_2) @@ -991,10 +1051,10 @@ LABEL(nibble_ashr_7_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $7, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $8, %ecx ja LABEL(loop_ashr_7_use_sse4_2) @@ -1044,18 +1104,18 @@ LABEL(loop_ashr_8_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $8, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1063,18 +1123,18 @@ LABEL(loop_ashr_8_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $8, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_8_use_sse4_2) @@ -1084,10 +1144,10 @@ LABEL(nibble_ashr_8_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $8, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $7, %ecx ja LABEL(loop_ashr_8_use_sse4_2) @@ -1138,18 +1198,18 @@ LABEL(loop_ashr_9_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $9, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1157,18 +1217,18 @@ LABEL(loop_ashr_9_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $9, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_9_use_sse4_2) @@ -1178,10 +1238,10 @@ LABEL(nibble_ashr_9_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $9, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $6, %ecx ja LABEL(loop_ashr_9_use_sse4_2) @@ -1231,18 +1291,18 @@ LABEL(loop_ashr_10_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $10, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1250,18 +1310,18 @@ LABEL(loop_ashr_10_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $10, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_10_use_sse4_2) @@ -1271,10 +1331,10 @@ LABEL(nibble_ashr_10_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $10, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $5, %ecx ja LABEL(loop_ashr_10_use_sse4_2) @@ -1324,18 +1384,18 @@ LABEL(loop_ashr_11_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $11, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1343,18 +1403,18 @@ LABEL(loop_ashr_11_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $11, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_11_use_sse4_2) @@ -1364,10 +1424,10 @@ LABEL(nibble_ashr_11_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $11, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $4, %ecx ja LABEL(loop_ashr_11_use_sse4_2) @@ -1417,18 +1477,18 @@ LABEL(loop_ashr_12_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $12, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1436,18 +1496,18 @@ LABEL(loop_ashr_12_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $12, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_12_use_sse4_2) @@ -1457,10 +1517,10 @@ LABEL(nibble_ashr_12_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $12, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $3, %ecx ja LABEL(loop_ashr_12_use_sse4_2) @@ -1511,18 +1571,18 @@ LABEL(loop_ashr_13_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $13, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1530,18 +1590,18 @@ LABEL(loop_ashr_13_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $13, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_13_use_sse4_2) @@ -1551,10 +1611,10 @@ LABEL(nibble_ashr_13_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $13, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $2, %ecx ja LABEL(loop_ashr_13_use_sse4_2) @@ -1605,18 +1665,18 @@ LABEL(loop_ashr_14_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $14, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1624,18 +1684,18 @@ LABEL(loop_ashr_14_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $14, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_14_use_sse4_2) @@ -1645,10 +1705,10 @@ LABEL(nibble_ashr_14_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $14, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $1, %ecx ja LABEL(loop_ashr_14_use_sse4_2) @@ -1701,18 +1761,18 @@ LABEL(loop_ashr_15_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $15, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx add $16, %r10 @@ -1720,18 +1780,18 @@ LABEL(loop_ashr_15_use_sse4_2): movdqa (%rdi, %rdx), %xmm0 palignr $15, -16(%rdi, %rdx), %xmm0 -#ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpistri $0x1a, (%rsi,%rdx), %xmm0 -#else +# else movdqa (%rsi,%rdx), %xmm1 TOLOWER (%xmm0, %xmm1) pcmpistri $0x1a, %xmm1, %xmm0 -#endif +# endif jbe LABEL(use_sse4_2_exit) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add $16, %rdx jmp LABEL(loop_ashr_15_use_sse4_2) @@ -1741,10 +1801,10 @@ LABEL(nibble_ashr_15_use_sse4_2): movdqa -16(%rdi, %rdx), %xmm0 psrldq $15, %xmm0 pcmpistri $0x3a,%xmm0, %xmm0 -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp %r11, %rcx jae LABEL(nibble_ashr_use_sse4_2_exit) -#endif +# endif cmp $0, %ecx ja LABEL(loop_ashr_15_use_sse4_2) @@ -1753,10 +1813,10 @@ LABEL(nibble_ashr_use_sse4_2_exit): .p2align 4 LABEL(use_sse4_2_exit): jnc LABEL(strcmp_exitz_sse4_2) -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub %rcx, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif add %rcx, %rdx lea -16(%rdi, %r9), %rdi movzbl (%rdi, %rdx), %eax @@ -1765,7 +1825,7 @@ LABEL(use_sse4_2_exit): jz LABEL(use_sse4_2_ret_sse4_2) xchg %eax, %edx LABEL(use_sse4_2_ret_sse4_2): -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx movl (%rcx,%rdx,4), %edx movl (%rcx,%rax,4), %eax @@ -1786,14 +1846,14 @@ LABEL(ret_sse4_2): LABEL(less16bytes_sse4_2): bsf %rdx, %rdx /* find and store bit index in %rdx */ -#ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub %rdx, %r11 jbe LABEL(strcmp_exitz_sse4_2) -#endif +# endif movzbl (%rsi, %rdx), %ecx movzbl (%rdi, %rdx), %eax -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx movl (%rdx,%rcx,4), %ecx movl (%rdx,%rax,4), %eax @@ -1812,7 +1872,7 @@ LABEL(Byte0_sse4_2): movzx (%rsi), %ecx movzx (%rdi), %eax -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx movl (%rdx,%rcx,4), %ecx movl (%rdx,%rax,4), %eax @@ -1870,6 +1930,16 @@ LABEL(unaligned_table_sse4_2): cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2 # endif +# ifdef USE_AS_STRNCASECMP_L +# define ENTRY2(name) \ + .type __strncasecmp_sse2, @function; \ + .align 16; \ + __strncasecmp_sse2: cfi_startproc; \ + CALL_MCOUNT +# define END2(name) \ + cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2 +# endif + # undef libc_hidden_builtin_def /* It doesn't make sense to send libc-internal strcmp calls through a PLT. The speedup we get from using SSE4.2 instruction is likely eaten away diff --git a/sysdeps/x86_64/multiarch/strncase_l-ssse3.S b/sysdeps/x86_64/multiarch/strncase_l-ssse3.S new file mode 100644 index 0000000..6728678 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncase_l-ssse3.S @@ -0,0 +1,6 @@ +#define USE_SSSE3 1 +#define USE_AS_STRNCASECMP_L +#define NO_NOLOCALE_ALIAS +#define STRCMP __strncasecmp_l_ssse3 +#define __strncasecmp __strncasecmp_ssse3 +#include "../strcmp.S" diff --git a/sysdeps/x86_64/multiarch/strncase_l.S b/sysdeps/x86_64/multiarch/strncase_l.S new file mode 100644 index 0000000..c725cd8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncase_l.S @@ -0,0 +1,6 @@ +#define STRCMP __strncasecmp_l +#define USE_AS_STRNCASECMP_L +#include "strcmp.S" + +weak_alias (__strncasecmp_l, strncasecmp_l) +libc_hidden_def (strncasecmp_l) diff --git a/sysdeps/x86_64/strcasecmp_l-nonascii.c b/sysdeps/x86_64/strcasecmp_l-nonascii.c index 7a0a04f..13b35de 100644 --- a/sysdeps/x86_64/strcasecmp_l-nonascii.c +++ b/sysdeps/x86_64/strcasecmp_l-nonascii.c @@ -1,5 +1,8 @@ #include +extern int __strcasecmp_l_nonascii (__const char *__s1, __const char *__s2, + __locale_t __loc); + #define __strcasecmp_l __strcasecmp_l_nonascii #define USE_IN_EXTENDED_LOCALE_MODEL 1 #include diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S index 1b48f04..5a4346b 100644 --- a/sysdeps/x86_64/strcmp.S +++ b/sysdeps/x86_64/strcmp.S @@ -60,10 +60,26 @@ # endif # define UPDATE_STRNCMP_COUNTER +#elif defined USE_AS_STRNCASECMP_L +# include "locale-defines.h" + +/* No support for strncasecmp outside libc so far since it is not needed. */ +# ifdef NOT_IN_lib +# error "strncasecmp_l not implemented so far" +# endif + +# define UPDATE_STRNCMP_COUNTER \ + /* calculate left number to compare */ \ + lea -16(%rcx, %r11), %r9; \ + cmp %r9, %r11; \ + jb LABEL(strcmp_exitz); \ + test %r9, %r9; \ + je LABEL(strcmp_exitz); \ + mov %r9, %r11 #else # define UPDATE_STRNCMP_COUNTER # ifndef STRCMP -# define STRCMP strcmp +# define STRCMP strncasecmp # endif #endif @@ -79,7 +95,7 @@ # define END2(name) END (name) # endif - ENTRY2 (__strcasecmp) +ENTRY2 (__strcasecmp) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax movq %fs:(%rax),%rdx @@ -92,6 +108,25 @@ weak_alias (__strcasecmp, strcasecmp) libc_hidden_def (__strcasecmp) # endif /* FALLTHROUGH to strcasecmp_l. */ +#elif defined USE_AS_STRNCASECMP_L +# ifndef ENTRY2 +# define ENTRY2(name) ENTRY (name) +# define END2(name) END (name) +# endif + +ENTRY2 (__strncasecmp) + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax + movq %fs:(%rax),%r10 + + // XXX 5 byte should be before the function + /* 5-byte NOP. */ + .byte 0x0f,0x1f,0x44,0x00,0x00 +END2 (__strncasecmp) +# ifndef NO_NOLOCALE_ALIAS +weak_alias (__strncasecmp, strncasecmp) +libc_hidden_def (__strncasecmp) +# endif + /* FALLTHROUGH to strncasecmp_l. */ #endif ENTRY (BP_SYM (STRCMP)) @@ -124,12 +159,22 @@ END (BP_SYM (STRCMP)) # endif testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) jne __strcasecmp_l_nonascii +# elif defined USE_AS_STRNCASECMP_L + /* We have to fall back on the C implementation for locales + with encodings not matching ASCII for single bytes. */ +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 + movq LOCALE_T___LOCALES+LC_CTYPE*8(%r10), %rax +# else + movq (%r10), %rax +# endif + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) + jne __strncasecmp_l_nonascii # endif /* * This implementation uses SSE to compare up to 16 bytes at a time. */ -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L test %rdx, %rdx je LABEL(strcmp_exitz) cmp $1, %rdx @@ -141,7 +186,7 @@ END (BP_SYM (STRCMP)) /* Use 64bit AND here to avoid long NOP padding. */ and $0x3f, %rcx /* rsi alignment in cache line */ and $0x3f, %rax /* rdi alignment in cache line */ -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L .section .rodata.cst16,"aM",@progbits,16 .align 16 .Lbelowupper: @@ -155,11 +200,11 @@ END (BP_SYM (STRCMP)) .quad 0x2020202020202020 .previous movdqa .Lbelowupper(%rip), %xmm5 -# define UCLOW_reg %xmm5 +# define UCLOW_reg %xmm5 movdqa .Ltopupper(%rip), %xmm6 -# define UCHIGH_reg %xmm6 +# define UCHIGH_reg %xmm6 movdqa .Ltouppermask(%rip), %xmm7 -# define LCQWORD_reg %xmm7 +# define LCQWORD_reg %xmm7 # endif cmp $0x30, %ecx ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */ @@ -169,7 +214,7 @@ END (BP_SYM (STRCMP)) movlpd (%rsi), %xmm2 movhpd 8(%rdi), %xmm1 movhpd 8(%rsi), %xmm2 -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L # define TOLOWER(reg1, reg2) \ movdqa reg1, %xmm8; \ movdqa UCHIGH_reg, %xmm9; \ @@ -196,7 +241,7 @@ END (BP_SYM (STRCMP)) pmovmskb %xmm1, %edx sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ jnz LABEL(less16bytes) /* If not, find different value or null char */ -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) /* finish comparision */ # endif @@ -241,7 +286,7 @@ LABEL(ashr_0): movdqa (%rsi), %xmm1 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ pcmpeqb %xmm1, %xmm0 /* Any null chars? */ -# ifndef USE_AS_STRCASECMP_L +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ # else movdqa (%rdi), %xmm2 @@ -280,7 +325,7 @@ LABEL(loop_ashr_0): sub $0xffff, %edx jnz LABEL(exit) /* mismatch or null char seen */ -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -295,7 +340,7 @@ LABEL(loop_ashr_0): pmovmskb %xmm1, %edx sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -363,7 +408,7 @@ LABEL(gobble_ashr_1): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -393,7 +438,7 @@ LABEL(gobble_ashr_1): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -412,7 +457,7 @@ LABEL(nibble_ashr_1): test $0xfffe, %edx jnz LABEL(ashr_1_exittail) /* find null char*/ -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $14, %r11 jbe LABEL(ashr_1_exittail) # endif @@ -493,7 +538,7 @@ LABEL(gobble_ashr_2): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -524,7 +569,7 @@ LABEL(gobble_ashr_2): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -540,7 +585,7 @@ LABEL(nibble_ashr_2): test $0xfffc, %edx jnz LABEL(ashr_2_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $13, %r11 jbe LABEL(ashr_2_exittail) # endif @@ -618,7 +663,7 @@ LABEL(gobble_ashr_3): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -649,7 +694,7 @@ LABEL(gobble_ashr_3): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -665,7 +710,7 @@ LABEL(nibble_ashr_3): test $0xfff8, %edx jnz LABEL(ashr_3_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $12, %r11 jbe LABEL(ashr_3_exittail) # endif @@ -743,7 +788,7 @@ LABEL(gobble_ashr_4): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -774,7 +819,7 @@ LABEL(gobble_ashr_4): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -790,7 +835,7 @@ LABEL(nibble_ashr_4): test $0xfff0, %edx jnz LABEL(ashr_4_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $11, %r11 jbe LABEL(ashr_4_exittail) # endif @@ -868,7 +913,7 @@ LABEL(gobble_ashr_5): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -899,7 +944,7 @@ LABEL(gobble_ashr_5): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -915,7 +960,7 @@ LABEL(nibble_ashr_5): test $0xffe0, %edx jnz LABEL(ashr_5_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $10, %r11 jbe LABEL(ashr_5_exittail) # endif @@ -993,7 +1038,7 @@ LABEL(gobble_ashr_6): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1024,7 +1069,7 @@ LABEL(gobble_ashr_6): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1040,7 +1085,7 @@ LABEL(nibble_ashr_6): test $0xffc0, %edx jnz LABEL(ashr_6_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $9, %r11 jbe LABEL(ashr_6_exittail) # endif @@ -1118,7 +1163,7 @@ LABEL(gobble_ashr_7): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1149,7 +1194,7 @@ LABEL(gobble_ashr_7): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1165,7 +1210,7 @@ LABEL(nibble_ashr_7): test $0xff80, %edx jnz LABEL(ashr_7_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $8, %r11 jbe LABEL(ashr_7_exittail) # endif @@ -1243,7 +1288,7 @@ LABEL(gobble_ashr_8): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1274,7 +1319,7 @@ LABEL(gobble_ashr_8): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1290,7 +1335,7 @@ LABEL(nibble_ashr_8): test $0xff00, %edx jnz LABEL(ashr_8_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $7, %r11 jbe LABEL(ashr_8_exittail) # endif @@ -1368,7 +1413,7 @@ LABEL(gobble_ashr_9): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1399,7 +1444,7 @@ LABEL(gobble_ashr_9): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1415,7 +1460,7 @@ LABEL(nibble_ashr_9): test $0xfe00, %edx jnz LABEL(ashr_9_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $6, %r11 jbe LABEL(ashr_9_exittail) # endif @@ -1493,7 +1538,7 @@ LABEL(gobble_ashr_10): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1524,7 +1569,7 @@ LABEL(gobble_ashr_10): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1540,7 +1585,7 @@ LABEL(nibble_ashr_10): test $0xfc00, %edx jnz LABEL(ashr_10_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $5, %r11 jbe LABEL(ashr_10_exittail) # endif @@ -1618,7 +1663,7 @@ LABEL(gobble_ashr_11): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1649,7 +1694,7 @@ LABEL(gobble_ashr_11): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1665,7 +1710,7 @@ LABEL(nibble_ashr_11): test $0xf800, %edx jnz LABEL(ashr_11_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $4, %r11 jbe LABEL(ashr_11_exittail) # endif @@ -1743,7 +1788,7 @@ LABEL(gobble_ashr_12): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1774,7 +1819,7 @@ LABEL(gobble_ashr_12): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1790,7 +1835,7 @@ LABEL(nibble_ashr_12): test $0xf000, %edx jnz LABEL(ashr_12_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $3, %r11 jbe LABEL(ashr_12_exittail) # endif @@ -1868,7 +1913,7 @@ LABEL(gobble_ashr_13): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1899,7 +1944,7 @@ LABEL(gobble_ashr_13): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -1915,7 +1960,7 @@ LABEL(nibble_ashr_13): test $0xe000, %edx jnz LABEL(ashr_13_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $2, %r11 jbe LABEL(ashr_13_exittail) # endif @@ -1993,7 +2038,7 @@ LABEL(gobble_ashr_14): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -2024,7 +2069,7 @@ LABEL(gobble_ashr_14): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -2040,7 +2085,7 @@ LABEL(nibble_ashr_14): test $0xc000, %edx jnz LABEL(ashr_14_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $1, %r11 jbe LABEL(ashr_14_exittail) # endif @@ -2120,7 +2165,7 @@ LABEL(gobble_ashr_15): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -2151,7 +2196,7 @@ LABEL(gobble_ashr_15): sub $0xffff, %edx jnz LABEL(exit) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 jbe LABEL(strcmp_exitz) # endif @@ -2167,7 +2212,7 @@ LABEL(nibble_ashr_15): test $0x8000, %edx jnz LABEL(ashr_15_exittail) -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L test %r11, %r11 je LABEL(ashr_15_exittail) # endif @@ -2205,14 +2250,14 @@ LABEL(ret): LABEL(less16bytes): bsf %rdx, %rdx /* find and store bit index in %rdx */ -# ifdef USE_AS_STRNCMP +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub %rdx, %r11 jbe LABEL(strcmp_exitz) # endif movzbl (%rsi, %rdx), %ecx movzbl (%rdi, %rdx), %eax -# ifdef USE_AS_STRCASECMP_L +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx movl (%rdx,%rcx,4), %ecx movl (%rdx,%rax,4), %eax @@ -2230,6 +2275,12 @@ LABEL(Byte0): movzx (%rsi), %ecx movzx (%rdi), %eax +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx + movl (%rdx,%rcx,4), %ecx + movl (%rdx,%rax,4), %eax +# endif + sub %ecx, %eax ret END (BP_SYM (STRCMP)) diff --git a/sysdeps/x86_64/strncase.S b/sysdeps/x86_64/strncase.S new file mode 100644 index 0000000..2de2ce4 --- /dev/null +++ b/sysdeps/x86_64/strncase.S @@ -0,0 +1 @@ +/* In strncase_l.S. */ diff --git a/sysdeps/x86_64/strncase_l-nonascii.c b/sysdeps/x86_64/strncase_l-nonascii.c new file mode 100644 index 0000000..baabd31 --- /dev/null +++ b/sysdeps/x86_64/strncase_l-nonascii.c @@ -0,0 +1,8 @@ +#include + +extern int __strncasecmp_l_nonascii (__const char *__s1, __const char *__s2, + size_t __n, __locale_t __loc); + +#define __strncasecmp_l __strncasecmp_l_nonascii +#define USE_IN_EXTENDED_LOCALE_MODEL 1 +#include diff --git a/sysdeps/x86_64/strncase_l.S b/sysdeps/x86_64/strncase_l.S new file mode 100644 index 0000000..c725cd8 --- /dev/null +++ b/sysdeps/x86_64/strncase_l.S @@ -0,0 +1,6 @@ +#define STRCMP __strncasecmp_l +#define USE_AS_STRNCASECMP_L +#include "strcmp.S" + +weak_alias (__strncasecmp_l, strncasecmp_l) +libc_hidden_def (strncasecmp_l) -- 2.7.4