From 64d76ca0647fc8f2a8709be957a551e9b92f5b1b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 11 May 2012 18:03:58 -0400 Subject: [PATCH] tilegx: small performance fix for string routines We were multiplying a byte by 0x0101010101010101ULL to create a constant for SIMD ops, but the compiler isn't good at optimizing this case (the fact that one operand is a byte is lost by the time it would be possible to do the optimization). So instead we add a helper routine that explicitly uses SIMD ops to create the constant. --- ChangeLog.tile | 11 +++++++++++ sysdeps/tile/tilegx/memchr.c | 4 ++-- sysdeps/tile/tilegx/memset.c | 5 +++-- sysdeps/tile/tilegx/rawmemchr.c | 4 ++-- sysdeps/tile/tilegx/strchr.c | 4 ++-- sysdeps/tile/tilegx/strchrnul.c | 4 ++-- sysdeps/tile/tilegx/string-endian.h | 12 +++++++++++- sysdeps/tile/tilegx/strrchr.c | 4 ++-- 8 files changed, 35 insertions(+), 13 deletions(-) diff --git a/ChangeLog.tile b/ChangeLog.tile index 83136aa..3c192e9 100644 --- a/ChangeLog.tile +++ b/ChangeLog.tile @@ -1,5 +1,16 @@ 2012-05-12 Chris Metcalf + * sysdeps/tile/tilegx/memchr.c: Use new copy_byte() function + to efficiently generate a large constant for masking. + * sysdeps/tile/tilegx/memset.c: Likewise. + * sysdeps/tile/tilegx/rawmemchr.c: Likewise. + * sysdeps/tile/tilegx/strchr.c: Likewise. + * sysdeps/tile/tilegx/strchrnul.c: Likewise. + * sysdeps/tile/tilegx/strrchr.c: Likewise. + * sysdeps/tile/tilegx/string-endian.h (copy_byte): New function. + +2012-05-12 Chris Metcalf + * sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n) without corrupting memory at "p". diff --git a/sysdeps/tile/tilegx/memchr.c b/sysdeps/tile/tilegx/memchr.c index aea25ff..32f24ec 100644 --- a/sysdeps/tile/tilegx/memchr.c +++ b/sysdeps/tile/tilegx/memchr.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Chris Metcalf , 2011. @@ -41,7 +41,7 @@ __memchr (const void *s, int c, size_t n) p = (const uint64_t *) (s_int & -8); /* Create eight copies of the byte for which we are looking. */ - goal = 0x0101010101010101ULL * (uint8_t) c; + goal = copy_byte(c); /* Read the first word, but munge it so that bytes before the array will not match goal. */ diff --git a/sysdeps/tile/tilegx/memset.c b/sysdeps/tile/tilegx/memset.c index 8083abf..dad3ead 100644 --- a/sysdeps/tile/tilegx/memset.c +++ b/sysdeps/tile/tilegx/memset.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Chris Metcalf , 2011. @@ -19,6 +19,7 @@ #include #include #include +#include "string-endian.h" void * __memset (void *s, int c, size_t n) @@ -71,7 +72,7 @@ __memset (void *s, int c, size_t n) n64 = n >> 3; /* Tile input byte out to 64 bits. */ - v64 = 0x0101010101010101ULL * (uint8_t) c; + v64 = copy_byte(c); /* This must be at least 8 or the following loop doesn't work. */ #define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8) diff --git a/sysdeps/tile/tilegx/rawmemchr.c b/sysdeps/tile/tilegx/rawmemchr.c index ed9162d..70b5928 100644 --- a/sysdeps/tile/tilegx/rawmemchr.c +++ b/sysdeps/tile/tilegx/rawmemchr.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Chris Metcalf , 2011. @@ -28,7 +28,7 @@ __rawmemchr (const void *s, int c) const uint64_t *p = (const uint64_t *) (s_int & -8); /* Create eight copies of the byte for which we are looking. */ - const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; + const uint64_t goal = copy_byte(c); /* Read the first word, but munge it so that bytes before the array will not match goal. */ diff --git a/sysdeps/tile/tilegx/strchr.c b/sysdeps/tile/tilegx/strchr.c index 8ef4fdc..c6a741b 100644 --- a/sysdeps/tile/tilegx/strchr.c +++ b/sysdeps/tile/tilegx/strchr.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Chris Metcalf , 2011. @@ -32,7 +32,7 @@ strchr (const char *s, int c) const uint64_t *p = (const uint64_t *) (s_int & -8); /* Create eight copies of the byte for which we are looking. */ - const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; + const uint64_t goal = copy_byte(c); /* Read the first aligned word, but force bytes before the string to match neither zero nor goal (we make sure the high bit of each byte diff --git a/sysdeps/tile/tilegx/strchrnul.c b/sysdeps/tile/tilegx/strchrnul.c index 1181443..4251598 100644 --- a/sysdeps/tile/tilegx/strchrnul.c +++ b/sysdeps/tile/tilegx/strchrnul.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Chris Metcalf , 2011. @@ -30,7 +30,7 @@ __strchrnul (const char *s, int c) const uint64_t *p = (const uint64_t *) (s_int & -8); /* Create eight copies of the byte for which we are looking. */ - const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; + const uint64_t goal = copy_byte(c); /* Read the first aligned word, but force bytes before the string to match neither zero nor goal (we make sure the high bit of each byte diff --git a/sysdeps/tile/tilegx/string-endian.h b/sysdeps/tile/tilegx/string-endian.h index 280efd3..c2e40ec 100644 --- a/sysdeps/tile/tilegx/string-endian.h +++ b/sysdeps/tile/tilegx/string-endian.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Chris Metcalf , 2011. @@ -33,3 +33,13 @@ #define CFZ(x) __insn_clz(x) #define REVCZ(x) __insn_ctz(x) #endif + +/* Create eight copies of the byte in a uint64_t. */ +static inline uint64_t copy_byte(uint8_t byte) +{ + uint64_t word = byte; + word = __insn_bfins(word, word, 8, 15); + word = __insn_bfins(word, word, 16, 31); + word = __insn_bfins(word, word, 32, 63); + return word; +} diff --git a/sysdeps/tile/tilegx/strrchr.c b/sysdeps/tile/tilegx/strrchr.c index 223c59d..b6e43a6 100644 --- a/sysdeps/tile/tilegx/strrchr.c +++ b/sysdeps/tile/tilegx/strrchr.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Free Software Foundation, Inc. +/* Copyright (C) 2011-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Chris Metcalf , 2011. @@ -28,7 +28,7 @@ strrchr (const char *s, int c) const uint64_t *p = (const uint64_t *) (s_int & -8); /* Create eight copies of the byte for which we are looking. */ - const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; + const uint64_t goal = copy_byte(c); /* Read the first aligned word, but force bytes before the string to match neither zero nor goal (we make sure the high bit of each byte -- 2.7.4