on bench-strncpy is 1.9-2.1x faster on average. I tried several variations, and using a tailcall and
calling memset conditionally gave the best overall results.
2015-08-05 Wilco Dijkstra <wdijkstr@arm.com>
+ * string/strncpy.c (strncpy):
+ Improve performance by using __strnlen/memcpy.
+
+2015-08-05 Wilco Dijkstra <wdijkstr@arm.com>
+
* sysdeps/aarch64/fpu/feenablxcpt.c (feenableexcept):
Optimize to avoid an unnecessary FPCR read.
<http://www.gnu.org/licenses/>. */
#include <string.h>
-#include <memcopy.h>
#undef strncpy
#ifndef STRNCPY
-#define STRNCPY strncpy
+ #define STRNCPY strncpy
#endif
char *
STRNCPY (char *s1, const char *s2, size_t n)
{
- char c;
- char *s = s1;
-
- --s1;
-
- if (n >= 4)
- {
- size_t n4 = n >> 2;
-
- for (;;)
- {
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- break;
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- break;
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- break;
- c = *s2++;
- *++s1 = c;
- if (c == '\0')
- break;
- if (--n4 == 0)
- goto last_chars;
- }
- s1++;
- n = n - (s1 - s);
- memset (s1, '\0', n);
- return s;
- }
-
- last_chars:
- n &= 3;
- if (n == 0)
- return s;
-
- do
- {
- c = *s2++;
- *++s1 = c;
- if (--n == 0)
- return s;
- }
- while (c != '\0');
-
- memset (s1 + 1, '\0', n);
- return s;
+ size_t size = __strnlen (s2, n);
+ if (size != n)
+ memset (s1 + size, '\0', n - size);
+ return memcpy (s1, s2, size);
}
libc_hidden_builtin_def (strncpy)