+2017-12-05 Chris Metcalf <cmetcalf@mellanox.com>
+
+ * sysdeps/tile/tilegx/string-endian.h (VECOP): Provide working
+ replacements for __insn_xxx builtins for v1cmpeq, v1cmpltu,
+ v1cmpne, v1add, v1shru, v1shl (register and immediate versions).
+ * sysdeps/tile/tilegx/memchr.c (__memchr): Use VECOP function
+ instead of __insn__xxx.
+ * sysdeps/tile/tilegx/rawmemchr.c (__rawmemchr): Likewise.
+ * sysdeps/tile/tilegx/strstr.c (strcasechr): Likewise.
+ * sysdeps/tile/tilegx/strrchr.c (strrchr): Likewise.
+ * sysdeps/tile/tilegx/strlen.c (strlen): Likewise.
+ * sysdeps/tile/tilegx/strchrnul.c (__strchrnul): Likewise.
+ * sysdeps/tile/tilegx/strchr.c (strchr): Likewise.
+
2017-12-05 Florian Weimer <fweimer@redhat.com>
Linux: Implement interfaces for memory protection keys
/* Compute the address of the word containing the last byte. */
last_word_ptr = (const uint64_t *) ((uintptr_t) last_byte_ptr & -8);
- while ((bits = __insn_v1cmpeq (v, goal)) == 0)
+ while ((bits = v1cmpeq (v, goal)) == 0)
{
if (__builtin_expect (p == last_word_ptr, 0))
{
uint64_t v = (*p | before_mask) ^ (goal & before_mask);
uint64_t bits;
- while ((bits = __insn_v1cmpeq (v, goal)) == 0)
+ while ((bits = v1cmpeq (v, goal)) == 0)
v = *++p;
return ((char *) p) + (CFZ (bits) >> 3);
match neither zero nor goal (we make sure the high bit of each byte
is 1, and the low 7 bits are all the opposite of the goal byte). */
const uint64_t before_mask = MASK (s_int);
- uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+ uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
uint64_t zero_matches, goal_matches;
while (1)
{
/* Look for a terminating '\0'. */
- zero_matches = __insn_v1cmpeqi (v, 0);
+ zero_matches = v1cmpeqi (v, 0);
/* Look for the goal byte. */
- goal_matches = __insn_v1cmpeq (v, goal);
+ goal_matches = v1cmpeq (v, goal);
if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
break;
match neither zero nor goal (we make sure the high bit of each byte
is 1, and the low 7 bits are all the opposite of the goal byte). */
const uint64_t before_mask = MASK (s_int);
- uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+ uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
uint64_t zero_matches, goal_matches;
while (1)
{
/* Look for a terminating '\0'. */
- zero_matches = __insn_v1cmpeqi (v, 0);
+ zero_matches = v1cmpeqi (v, 0);
/* Look for the goal byte. */
- goal_matches = __insn_v1cmpeq (v, goal);
+ goal_matches = v1cmpeq (v, goal);
if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
break;
{
return __insn_shufflebytes(byte, 0, 0);
}
+
+/* Implement the byte vector instructions using extended assembly.
+ The __insn_OP() builtins are buggy in current compiler versions. */
+
+#define VECOP(OP) \
+ static inline uint64_t OP (uint64_t a, uint64_t b) \
+ { \
+ uint64_t result; \
+ asm volatile (#OP " %0, %1, %2" : "=r"(result) : "r"(a), "r"(b)); \
+ return result; \
+ } \
+ \
+ static inline uint64_t OP ## i (uint64_t a, uint64_t b) \
+ { \
+ uint64_t result; \
+ asm volatile (#OP "i %0, %1, %2" : "=r"(result) : "r"(a), "I"(b)); \
+ return result; \
+ }
+
+VECOP(v1cmpeq)
+VECOP(v1cmpltu)
+VECOP(v1cmpne)
+VECOP(v1add)
+VECOP(v1shru)
+VECOP(v1shl)
uint64_t v = *p | MASK (s_int);
uint64_t bits;
- while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+ while ((bits = v1cmpeqi (v, 0)) == 0)
v = *++p;
return ((const char *) p) + (CFZ (bits) >> 3) - s;
uint64_t v = *p | MASK (s_int);
uint64_t bits;
- while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+ while ((bits = v1cmpeqi (v, 0)) == 0)
{
if (bytes_read >= maxlen)
{
match neither zero nor goal (we make sure the high bit of each byte
is 1, and the low 7 bits are all the opposite of the goal byte). */
const uint64_t before_mask = MASK (s_int);
- uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+ uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
const char *found = NULL;
uint64_t zero_matches, goal_matches;
while (1)
{
/* Look for a terminating '\0'. */
- zero_matches = __insn_v1cmpeqi (v, 0);
+ zero_matches = v1cmpeqi (v, 0);
/* Look for the goal byte. */
- goal_matches = __insn_v1cmpeq (v, goal);
+ goal_matches = v1cmpeq (v, goal);
/* If we found the goal, record the last offset. */
if (__builtin_expect (goal_matches != 0, 0))
vec_tolower (uint64_t cc)
{
/* For Uppercases letters, add 32 to convert to lower case. */
- uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1);
- uint64_t less_than_A = __insn_v1cmpltui (cc, 'A');
- uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A);
- return __insn_v1add (cc,__insn_v1shli (is_upper, 5));
+ uint64_t less_than_eq_Z = v1cmpltui (cc, 'Z' + 1);
+ uint64_t less_than_A = v1cmpltui (cc, 'A');
+ uint64_t is_upper = v1cmpne (less_than_eq_Z, less_than_A);
+ return v1add (cc, v1shli (is_upper, 5));
}
/* There is no strcasechr() defined, but needed for 1 byte case
is 1, and the low 7 bits are all the opposite of the goal byte). */
const uint64_t before_mask = MASK (s_int);
uint64_t v =
- (vec_tolower (*p) | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+ (vec_tolower (*p) | before_mask) ^ (goal & v1shrui (before_mask, 1));
uint64_t zero_matches, goal_matches;
while (1)
{
/* Look for a terminating '\0'. */
- zero_matches = __insn_v1cmpeqi (v, 0);
+ zero_matches = v1cmpeqi (v, 0);
/* Look for the goal byte. */
- goal_matches = __insn_v1cmpeq (v, goal);
+ goal_matches = v1cmpeq (v, goal);
if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
break;
is 1, and the low 7 bits are all the opposite of the goal byte). */
const uint64_t before_mask = MASK (s_int);
uint64_t v =
- (vec_load (p) | before_mask) ^ (byte1 & __insn_v1shrui (before_mask, 1));
+ (vec_load (p) | before_mask) ^ (byte1 & v1shrui (before_mask, 1));
uint64_t zero_matches, goal_matches;
while (1)
{
/* Look for a terminating '\0'. */
- zero_matches = __insn_v1cmpeqi (v, 0);
- uint64_t byte1_matches = __insn_v1cmpeq (v, byte1);
+ zero_matches = v1cmpeqi (v, 0);
+ uint64_t byte1_matches = v1cmpeq (v, byte1);
if (__builtin_expect (zero_matches != 0, 0))
{
/* This is the last vector. Don't worry about matches
back 1 byte to align it with the first byte, then and to
check for both matching. Each vector has a 1 in the LSB
of the byte if there was match. */
- uint64_t byte2_matches = __insn_v1cmpeq (v, byte2);
+ uint64_t byte2_matches = v1cmpeq (v, byte2);
goal_matches = byte1_matches & STRSHIFT (byte2_matches, 8);
break;
}
{
/* 8-bytes starting 1 byte into v. */
v = __insn_dblalign (v, v2, (void*)1);
- uint64_t byte2_matches_shifted = __insn_v1cmpeq (v, byte2);
+ uint64_t byte2_matches_shifted = v1cmpeq (v, byte2);
goal_matches = byte1_matches & byte2_matches_shifted;
if (__builtin_expect (goal_matches != 0, 0))
break;