stpcpy-sse2-unaligned \
stpncpy-avx2 \
stpncpy-avx2-rtm \
- stpncpy-c \
+ stpncpy-generic \
stpncpy-evex \
stpncpy-sse2-unaligned \
strcasecmp_l-avx2 \
strcpy-evex \
strcpy-sse2 \
strcpy-sse2-unaligned \
- strcspn-c \
- strcspn-sse2 \
+ strcspn-generic \
+ strcspn-sse4 \
strlen-avx2 \
strlen-avx2-rtm \
strlen-evex \
strncase_l-sse4_2 \
strncat-avx2 \
strncat-avx2-rtm \
- strncat-c \
+ strncat-generic \
strncat-evex \
strncat-sse2-unaligned \
strncmp-avx2 \
strncmp-sse4_2 \
strncpy-avx2 \
strncpy-avx2-rtm \
- strncpy-c \
+ strncpy-generic \
strncpy-evex \
strncpy-sse2-unaligned \
strnlen-avx2 \
strnlen-evex \
strnlen-evex512 \
strnlen-sse2 \
- strpbrk-c \
- strpbrk-sse2 \
+ strpbrk-generic \
+ strpbrk-sse4 \
strrchr-avx2 \
strrchr-avx2-rtm \
strrchr-evex \
strrchr-sse2 \
- strspn-c \
- strspn-sse2 \
+ strspn-generic \
+ strspn-sse4 \
strstr-avx512 \
strstr-sse2-unaligned \
varshift \
# sysdep_routines
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
+
+CFLAGS-strcspn-sse4.c += -msse4
+CFLAGS-strpbrk-sse4.c += -msse4
+CFLAGS-strspn-sse4.c += -msse4
+
CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
endif
wcscmp-avx2-rtm \
wcscmp-evex \
wcscmp-sse2 \
- wcscpy-c \
+ wcscpy-generic \
wcscpy-ssse3 \
wcslen-avx2 \
wcslen-avx2-rtm \
wcslen-sse4_1 \
wcsncmp-avx2 \
wcsncmp-avx2-rtm \
+ wcsncmp-generic \
wcsncmp-evex \
- wcsncmp-sse2 \
wcsnlen-avx2 \
wcsnlen-avx2-rtm \
- wcsnlen-c \
+ wcsnlen-generic \
wcsnlen-evex \
wcsnlen-evex512 \
wcsnlen-sse4_1 \
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
return OPTIMIZE (avx2);
}
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
__stpncpy_evex)
IFUNC_IMPL_ADD (array, i, stpncpy, 1,
__stpncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_generic))
/* Support sysdeps/x86_64/multiarch/stpcpy.c. */
IFUNC_IMPL (i, name, stpcpy,
IFUNC_IMPL (i, name, strcspn,
IFUNC_IMPL_ADD (array, i, strcspn, CPU_FEATURE_USABLE (SSE4_2),
__strcspn_sse42)
- IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_generic))
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
__strncat_evex)
IFUNC_IMPL_ADD (array, i, strncat, 1,
__strncat_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
+ IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_generic))
/* Support sysdeps/x86_64/multiarch/strncpy.c. */
IFUNC_IMPL (i, name, strncpy,
__strncpy_evex)
IFUNC_IMPL_ADD (array, i, strncpy, 1,
__strncpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
+ IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_generic))
/* Support sysdeps/x86_64/multiarch/strpbrk.c. */
IFUNC_IMPL (i, name, strpbrk,
IFUNC_IMPL_ADD (array, i, strpbrk, CPU_FEATURE_USABLE (SSE4_2),
__strpbrk_sse42)
- IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
+ IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_generic))
/* Support sysdeps/x86_64/multiarch/strspn.c. */
IFUNC_IMPL (i, name, strspn,
IFUNC_IMPL_ADD (array, i, strspn, CPU_FEATURE_USABLE (SSE4_2),
__strspn_sse42)
- IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_sse2))
+ IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_generic))
/* Support sysdeps/x86_64/multiarch/strstr.c. */
IFUNC_IMPL (i, name, strstr,
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_evex)
- IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
+ IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_generic))
/* Support sysdeps/x86_64/multiarch/wcscpy.c. */
IFUNC_IMPL (i, name, wcscpy,
IFUNC_IMPL_ADD (array, i, wcscpy, CPU_FEATURE_USABLE (SSSE3),
__wcscpy_ssse3)
- IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
+ IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_generic))
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
IFUNC_IMPL (i, name, wcslen,
IFUNC_IMPL_ADD (array, i, wcsnlen,
CPU_FEATURE_USABLE (SSE4_1),
__wcsnlen_sse4_1)
- IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+ IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_generic))
/* Support sysdeps/x86_64/multiarch/wmemchr.c. */
IFUNC_IMPL (i, name, wmemchr,
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
static inline void *
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2))
return OPTIMIZE (sse42);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
return OPTIMIZE (sse2_unaligned);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+#ifndef GENERIC
+# define GENERIC sse2
+#endif
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (GENERIC) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse4_1);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (GENERIC);
}
+++ /dev/null
-#define STPNCPY __stpncpy_sse2
-#undef weak_alias
-#define weak_alias(ignored1, ignored2)
-#undef libc_hidden_def
-#define libc_hidden_def(stpncpy)
-
-#include <string/stpncpy.c>
--- /dev/null
+/* stpncpy.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#define STPNCPY __stpncpy_generic
+#undef weak_alias
+#define weak_alias(ignored1, ignored2)
+#undef libc_hidden_def
+#define libc_hidden_def(stpncpy)
+
+#include <string/stpncpy.c>
# undef stpncpy
# undef __stpncpy
+# define GENERIC generic
# define SYMBOL_NAME stpncpy
# include "ifunc-strcpy.h"
+++ /dev/null
-/* strcspn with SSE4.2 intrinsics
- Copyright (C) 2009-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
-
-/* We use 0x2:
- _SIDD_SBYTE_OPS
- | _SIDD_CMP_EQUAL_ANY
- | _SIDD_POSITIVE_POLARITY
- | _SIDD_LEAST_SIGNIFICANT
- on pcmpistri to compare xmm/mem128
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- X X X X X X X X X X X X X X X X
-
- against xmm
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- A A A A A A A A A A A A A A A A
-
- to find out if the first 16byte data element has any byte A and
- the offset of the first byte. There are 3 cases:
-
- 1. The first 16byte data element has the byte A at the offset X.
- 2. The first 16byte data element has EOS and doesn't have the byte A.
- 3. The first 16byte data element is valid and doesn't have the byte A.
-
- Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
- 1 X 1 0/1 0
- 2 16 0 1 0
- 3 16 0 0 0
-
- We exit from the loop for cases 1 and 2 with jbe which branches
- when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
- X for case 1. */
-
-#ifndef STRCSPN_SSE2
-# define STRCSPN_SSE2 __strcspn_sse2
-# define STRCSPN_SSE42 __strcspn_sse42
-#endif
-
-#ifdef USE_AS_STRPBRK
-# define RETURN(val1, val2) return val1
-#else
-# define RETURN(val1, val2) return val2
-#endif
-
-extern
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
-#endif
-STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
-
-
-#ifdef USE_AS_STRPBRK
-char *
-#else
-size_t
-#endif
-__attribute__ ((section (".text.sse4.2")))
-STRCSPN_SSE42 (const char *s, const char *a)
-{
- if (*a == 0)
- RETURN (NULL, strlen (s));
-
- const char *aligned;
- __m128i mask, maskz, zero;
- unsigned int maskz_bits;
- unsigned int offset = (unsigned int) ((size_t) a & 15);
- zero = _mm_set1_epi8 (0);
- if (offset != 0)
- {
- /* Load masks. */
- aligned = (const char *) ((size_t) a & -16L);
- __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
- maskz = _mm_cmpeq_epi8 (mask0, zero);
-
- /* Find where the NULL terminator is. */
- maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
- if (maskz_bits != 0)
- {
- mask = __m128i_shift_right (mask0, offset);
- offset = (unsigned int) ((size_t) s & 15);
- if (offset)
- goto start_unaligned;
-
- aligned = s;
- goto start_loop;
- }
- }
-
- /* A is aligned. */
- mask = _mm_loadu_si128 ((__m128i *) a);
- /* Find where the NULL terminator is. */
- maskz = _mm_cmpeq_epi8 (mask, zero);
- maskz_bits = _mm_movemask_epi8 (maskz);
- if (maskz_bits == 0)
- {
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
- if (a[16] != 0)
- return STRCSPN_SSE2 (s, a);
- }
-
- aligned = s;
- offset = (unsigned int) ((size_t) s & 15);
- if (offset != 0)
- {
- start_unaligned:
- /* Check partial string. */
- aligned = (const char *) ((size_t) s & -16L);
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
-
- value = __m128i_shift_right (value, offset);
-
- unsigned int length = _mm_cmpistri (mask, value, 0x2);
- /* No need to check ZFlag since ZFlag is always 1. */
- unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
- if (cflag)
- RETURN ((char *) (s + length), length);
- /* Find where the NULL terminator is. */
- unsigned int index = _mm_cmpistri (value, value, 0x3a);
- if (index < 16 - offset)
- RETURN (NULL, index);
- aligned += 16;
- }
-
-start_loop:
- while (1)
- {
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
- unsigned int index = _mm_cmpistri (mask, value, 0x2);
- unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
- unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
- if (cflag)
- RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
- if (zflag)
- RETURN (NULL,
- /* Find where the NULL terminator is. */
- (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
- aligned += 16;
- }
-}
--- /dev/null
+/* strcspn.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRCSPN __strcspn_generic
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRCSPN)
+#endif
+
+#include <string/strcspn.c>
+++ /dev/null
-/* strcspn.
- Copyright (C) 2017-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRCSPN __strcspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRCSPN)
-#endif
-
-#include <string/strcspn.c>
--- /dev/null
+/* strcspn with SSE4.2 intrinsics
+ Copyright (C) 2009-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <nmmintrin.h>
+#include <string.h>
+#include "varshift.h"
+
+/* We use 0x2:
+ _SIDD_SBYTE_OPS
+ | _SIDD_CMP_EQUAL_ANY
+ | _SIDD_POSITIVE_POLARITY
+ | _SIDD_LEAST_SIGNIFICANT
+ on pcmpistri to compare xmm/mem128
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ X X X X X X X X X X X X X X X X
+
+ against xmm
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ A A A A A A A A A A A A A A A A
+
+ to find out if the first 16byte data element has any byte A and
+ the offset of the first byte. There are 3 cases:
+
+ 1. The first 16byte data element has the byte A at the offset X.
+ 2. The first 16byte data element has EOS and doesn't have the byte A.
+ 3. The first 16byte data element is valid and doesn't have the byte A.
+
+ Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
+
+ 1 X 1 0/1 0
+ 2 16 0 1 0
+ 3 16 0 0 0
+
+ We exit from the loop for cases 1 and 2 with jbe which branches
+ when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
+ X for case 1. */
+
+#ifndef STRCSPN_GENERIC
+# define STRCSPN_GENERIC __strcspn_generic
+# define STRCSPN_SSE42 __strcspn_sse42
+#endif
+
+#ifdef USE_AS_STRPBRK
+# define RETURN(val1, val2) return val1
+#else
+# define RETURN(val1, val2) return val2
+#endif
+
+extern
+#ifdef USE_AS_STRPBRK
+char *
+#else
+size_t
+#endif
+STRCSPN_GENERIC (const char *, const char *) attribute_hidden;
+
+
+#ifdef USE_AS_STRPBRK
+char *
+#else
+size_t
+#endif
+__attribute__ ((section (".text.sse4.2")))
+STRCSPN_SSE42 (const char *s, const char *a)
+{
+ if (*a == 0)
+ RETURN (NULL, strlen (s));
+
+ const char *aligned;
+ __m128i mask, maskz, zero;
+ unsigned int maskz_bits;
+ unsigned int offset = (unsigned int) ((size_t) a & 15);
+ zero = _mm_set1_epi8 (0);
+ if (offset != 0)
+ {
+ /* Load masks. */
+ aligned = (const char *) ((size_t) a & -16L);
+ __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
+ maskz = _mm_cmpeq_epi8 (mask0, zero);
+
+ /* Find where the NULL terminator is. */
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ {
+ mask = __m128i_shift_right (mask0, offset);
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset)
+ goto start_unaligned;
+
+ aligned = s;
+ goto start_loop;
+ }
+ }
+
+ /* A is aligned. */
+ mask = _mm_loadu_si128 ((__m128i *) a);
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (mask, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz);
+ if (maskz_bits == 0)
+ {
+ /* There is no NULL terminator. Don't use SSE4.2 if the length
+ of A > 16. */
+ if (a[16] != 0)
+ return STRCSPN_GENERIC (s, a);
+ }
+
+ aligned = s;
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset != 0)
+ {
+ start_unaligned:
+ /* Check partial string. */
+ aligned = (const char *) ((size_t) s & -16L);
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+
+ value = __m128i_shift_right (value, offset);
+
+ unsigned int length = _mm_cmpistri (mask, value, 0x2);
+ /* No need to check ZFlag since ZFlag is always 1. */
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+ if (cflag)
+ RETURN ((char *) (s + length), length);
+ /* Find where the NULL terminator is. */
+ unsigned int index = _mm_cmpistri (value, value, 0x3a);
+ if (index < 16 - offset)
+ RETURN (NULL, index);
+ aligned += 16;
+ }
+
+start_loop:
+ while (1)
+ {
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+ unsigned int index = _mm_cmpistri (mask, value, 0x2);
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+ unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
+ if (cflag)
+ RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
+ if (zflag)
+ RETURN (NULL,
+ /* Find where the NULL terminator is. */
+ (size_t) (aligned + _mm_cmpistri (value, value, 0x3a) - s));
+ aligned += 16;
+ }
+}
+++ /dev/null
-#define STRNCAT __strncat_sse2
-#include <string/strncat.c>
--- /dev/null
+/* strncat.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#define STRNCAT __strncat_generic
+#include <string/strncat.c>
# undef strncat
# define SYMBOL_NAME strncat
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
+++ /dev/null
-#define STRNCPY __strncpy_sse2
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(strncpy)
-
-#include <string/strncpy.c>
--- /dev/null
+/* strncpy.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#define STRNCPY __strncpy_generic
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(strncpy)
+
+#include <string/strncpy.c>
# undef strncpy
# define SYMBOL_NAME strncpy
+# define GENERIC generic
# include "ifunc-strcpy.h"
libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
+++ /dev/null
-/* strpbrk with SSE4.2 intrinsics
- Copyright (C) 2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#define USE_AS_STRPBRK
-#define STRCSPN_SSE2 __strpbrk_sse2
-#define STRCSPN_SSE42 __strpbrk_sse42
-#include "strcspn-c.c"
--- /dev/null
+/* strpbrk.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRPBRK __strpbrk_generic
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRPBRK)
+#endif
+
+#include <string/strpbrk.c>
+++ /dev/null
-/* strpbrk.
- Copyright (C) 2017-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRPBRK __strpbrk_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRPBRK)
-#endif
-
-#include <string/strpbrk.c>
--- /dev/null
+/* strpbrk with SSE4.2 intrinsics
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define USE_AS_STRPBRK
+#define STRCSPN_GENERIC __strpbrk_generic
+#define STRCSPN_SSE42 __strpbrk_sse42
+#include "strcspn-sse4.c"
+++ /dev/null
-/* strspn with SSE4.2 intrinsics
- Copyright (C) 2009-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <nmmintrin.h>
-#include <string.h>
-#include "varshift.h"
-
-/* We use 0x12:
- _SIDD_SBYTE_OPS
- | _SIDD_CMP_EQUAL_ANY
- | _SIDD_NEGATIVE_POLARITY
- | _SIDD_LEAST_SIGNIFICANT
- on pcmpistri to compare xmm/mem128
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- X X X X X X X X X X X X X X X X
-
- against xmm
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- A A A A A A A A A A A A A A A A
-
- to find out if the first 16byte data element has any non-A byte and
- the offset of the first byte. There are 2 cases:
-
- 1. The first 16byte data element has the non-A byte, including
- EOS, at the offset X.
- 2. The first 16byte data element is valid and doesn't have the non-A
- byte.
-
- Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
-
- case ECX CFlag ZFlag SFlag
- 1 X 1 0/1 0
- 2 16 0 0 0
-
- We exit from the loop for case 1. */
-
-extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
-
-
-size_t
-__attribute__ ((section (".text.sse4.2")))
-__strspn_sse42 (const char *s, const char *a)
-{
- if (*a == 0)
- return 0;
-
- const char *aligned;
- __m128i mask, maskz, zero;
- unsigned int maskz_bits;
- unsigned int offset = (int) ((size_t) a & 15);
- zero = _mm_set1_epi8 (0);
- if (offset != 0)
- {
- /* Load masks. */
- aligned = (const char *) ((size_t) a & -16L);
- __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
- maskz = _mm_cmpeq_epi8 (mask0, zero);
-
- /* Find where the NULL terminator is. */
- maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
- if (maskz_bits != 0)
- {
- mask = __m128i_shift_right (mask0, offset);
- offset = (unsigned int) ((size_t) s & 15);
- if (offset)
- goto start_unaligned;
-
- aligned = s;
- goto start_loop;
- }
- }
-
- /* A is aligned. */
- mask = _mm_loadu_si128 ((__m128i *) a);
-
- /* Find where the NULL terminator is. */
- maskz = _mm_cmpeq_epi8 (mask, zero);
- maskz_bits = _mm_movemask_epi8 (maskz);
- if (maskz_bits == 0)
- {
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
- if (a[16] != 0)
- return __strspn_sse2 (s, a);
- }
- aligned = s;
- offset = (unsigned int) ((size_t) s & 15);
-
- if (offset != 0)
- {
- start_unaligned:
- /* Check partial string. */
- aligned = (const char *) ((size_t) s & -16L);
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
- __m128i adj_value = __m128i_shift_right (value, offset);
-
- unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
- /* No need to check CFlag since it is always 1. */
- if (length < 16 - offset)
- return length;
- /* Find where the NULL terminator is. */
- maskz = _mm_cmpeq_epi8 (value, zero);
- maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
- if (maskz_bits != 0)
- return length;
- aligned += 16;
- }
-
-start_loop:
- while (1)
- {
- __m128i value = _mm_load_si128 ((__m128i *) aligned);
- unsigned int index = _mm_cmpistri (mask, value, 0x12);
- unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
- if (cflag)
- return (size_t) (aligned + index - s);
- aligned += 16;
- }
-}
--- /dev/null
+/* strspn.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRSPN __strspn_generic
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRSPN)
+#endif
+
+#include <string/strspn.c>
+++ /dev/null
-/* strspn.
- Copyright (C) 2017-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define STRSPN __strspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(STRSPN)
-#endif
-
-#include <string/strspn.c>
--- /dev/null
+/* strspn with SSE4.2 intrinsics
+ Copyright (C) 2009-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <nmmintrin.h>
+#include <string.h>
+#include "varshift.h"
+
+/* We use 0x12:
+ _SIDD_SBYTE_OPS
+ | _SIDD_CMP_EQUAL_ANY
+ | _SIDD_NEGATIVE_POLARITY
+ | _SIDD_LEAST_SIGNIFICANT
+ on pcmpistri to compare xmm/mem128
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ X X X X X X X X X X X X X X X X
+
+ against xmm
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ A A A A A A A A A A A A A A A A
+
+ to find out if the first 16byte data element has any non-A byte and
+ the offset of the first byte. There are 2 cases:
+
+ 1. The first 16byte data element has the non-A byte, including
+ EOS, at the offset X.
+ 2. The first 16byte data element is valid and doesn't have the non-A
+ byte.
+
+ Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
+
+ case ECX CFlag ZFlag SFlag
+ 1 X 1 0/1 0
+ 2 16 0 0 0
+
+ We exit from the loop for case 1. */
+
+extern size_t __strspn_generic (const char *, const char *) attribute_hidden;
+
+
+size_t
+__attribute__ ((section (".text.sse4.2")))
+__strspn_sse42 (const char *s, const char *a)
+{
+ if (*a == 0)
+ return 0;
+
+ const char *aligned;
+ __m128i mask, maskz, zero;
+ unsigned int maskz_bits;
+ unsigned int offset = (int) ((size_t) a & 15);
+ zero = _mm_set1_epi8 (0);
+ if (offset != 0)
+ {
+ /* Load masks. */
+ aligned = (const char *) ((size_t) a & -16L);
+ __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
+ maskz = _mm_cmpeq_epi8 (mask0, zero);
+
+ /* Find where the NULL terminator is. */
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ {
+ mask = __m128i_shift_right (mask0, offset);
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset)
+ goto start_unaligned;
+
+ aligned = s;
+ goto start_loop;
+ }
+ }
+
+ /* A is aligned. */
+ mask = _mm_loadu_si128 ((__m128i *) a);
+
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (mask, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz);
+ if (maskz_bits == 0)
+ {
+ /* There is no NULL terminator. Don't use SSE4.2 if the length
+ of A > 16. */
+ if (a[16] != 0)
+ return __strspn_generic (s, a);
+ }
+ aligned = s;
+ offset = (unsigned int) ((size_t) s & 15);
+
+ if (offset != 0)
+ {
+ start_unaligned:
+ /* Check partial string. */
+ aligned = (const char *) ((size_t) s & -16L);
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+ __m128i adj_value = __m128i_shift_right (value, offset);
+
+ unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
+ /* No need to check CFlag since it is always 1. */
+ if (length < 16 - offset)
+ return length;
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (value, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ return length;
+ aligned += 16;
+ }
+
+start_loop:
+ while (1)
+ {
+ __m128i value = _mm_load_si128 ((__m128i *) aligned);
+ unsigned int index = _mm_cmpistri (mask, value, 0x12);
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
+ if (cflag)
+ return (size_t) (aligned + index - s);
+ aligned += 16;
+ }
+}
+++ /dev/null
-#if IS_IN (libc)
-# define WCSCPY __wcscpy_sse2
-#endif
-
-#include <wcsmbs/wcscpy.c>
--- /dev/null
+/* wcscpy.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+# define WCSCPY __wcscpy_generic
+#endif
+
+#include <wcsmbs/wcscpy.c>
# define SYMBOL_NAME wcscpy
# include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
static inline void *
if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
return OPTIMIZE (ssse3);
- return OPTIMIZE (sse2);
+ return OPTIMIZE (generic);
}
libc_ifunc_redirected (__redirect_wcscpy, __wcscpy, IFUNC_SELECTOR ());
--- /dev/null
+/* wcsncmp.
+ Copyright (C) 2018-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WCSNCMP __wcsncmp_generic
+#include <wcsmbs/wcsncmp.c>
+++ /dev/null
-/* wcsncmp optimized with SSE2.
- Copyright (C) 2018-2022 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#define WCSNCMP __wcsncmp_sse2
-#include <wcsmbs/wcsncmp.c>
# undef wcsncmp
# undef __wcsncmp
+# define GENERIC generic
+
# define SYMBOL_NAME wcsncmp
# include "ifunc-avx2.h"
+++ /dev/null
-#if IS_IN (libc)
-# include <wchar.h>
-
-# define WCSNLEN __wcsnlen_sse2
-
-extern __typeof (wcsnlen) __wcsnlen_sse2;
-#endif
-
-#include "wcsmbs/wcsnlen.c"
--- /dev/null
+/* wcsnlen.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#if IS_IN (libc)
+# include <wchar.h>
+
+# define WCSNLEN __wcsnlen_generic
+
+extern __typeof (wcsnlen) __wcsnlen_generic;
+#endif
+
+#include "wcsmbs/wcsnlen.c"
# undef __wcsnlen
# define SYMBOL_NAME wcsnlen
+# define GENERIC generic
# include "ifunc-wcslen.h"
libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());