From 200b5faee1cfac10d831e9b278ef294ca3119f53 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 May 2010 09:06:15 -0700 Subject: [PATCH] alpha: fix memchr to not cause memory faults. http://www.mail-archive.com/debian-alpha@lists.debian.org/msg25088.html Signed-off-by: Matt Turner --- ChangeLog.alpha | 5 +++++ sysdeps/alpha/alphaev6/memchr.S | 26 +++++++++++++------------- sysdeps/alpha/memchr.S | 22 +++++++++++----------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/ChangeLog.alpha b/ChangeLog.alpha index c3bbe12..39d5816 100644 --- a/ChangeLog.alpha +++ b/ChangeLog.alpha @@ -1,4 +1,9 @@ 2010-05-03 Aurelien Jarno + + * sysdeps/alpha/memchr.S: Use prefetch load. + * sysdeps/alpha/alphaev6/memchr.S: Likewise. + +2010-05-03 Aurelien Jarno * sysdeps/alpha/dl-machine.h: Add dl-procinfo support. * sysdeps/alpha/dl-procinfo.c: New. diff --git a/sysdeps/alpha/alphaev6/memchr.S b/sysdeps/alpha/alphaev6/memchr.S index 88e91fa..fe77cd8 100644 --- a/sysdeps/alpha/alphaev6/memchr.S +++ b/sysdeps/alpha/alphaev6/memchr.S @@ -127,7 +127,7 @@ $first_quad: cmpbge $31, $1, $2 # E : bne $2, $found_it # U : # At least one byte left to process. - ldq $1, 8($0) # L : + ldq $31, 8($0) # L : subq $5, 1, $18 # E : U L U L addq $0, 8, $0 # E : @@ -143,38 +143,38 @@ $first_quad: and $4, 8, $4 # E : odd number of quads? bne $4, $odd_quad_count # U : # At least three quads remain to be accessed - mov $1, $4 # E : L U L U : move prefetched value to correct reg + nop # E : L U L U : move prefetched value to correct reg .align 4 $unrolled_loop: - ldq $1, 8($0) # L : prefetch $1 - xor $17, $4, $2 # E : - cmpbge $31, $2, $2 # E : - bne $2, $found_it # U : U L U L + ldq $1, 0($0) # L : load quad + xor $17, $1, $2 # E : + ldq $31, 8($0) # L : prefetch next quad + cmpbge $31, $2, $2 # E : U L U L + bne $2, $found_it # U : addq $0, 8, $0 # E : nop # E : nop # E : - nop # E : $odd_quad_count: + ldq $1, 0($0) # L : load quad xor $17, $1, $2 # E : - ldq $4, 8($0) # L : prefetch $4 + ldq $31, 8($0) # L : prefetch $4 cmpbge $31, $2, $2 # E : - addq $0, 8, $6 # E : + addq $0, 8, $6 # E : bne $2, $found_it # U : cmpult $6, $18, $6 # E : addq $0, 8, $0 # E : - nop # E : bne $6, $unrolled_loop # U : - mov $4, $1 # E : move prefetched value into $1 nop # E : nop # E : - -$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do nop # E : + +$final: ldq $1, 0($0) # L : load last quad + subq $5, $0, $18 # E : $18 <- number of bytes left to do nop # E : bne $18, $last_quad # U : diff --git a/sysdeps/alpha/memchr.S b/sysdeps/alpha/memchr.S index 5d713d5..87c7fb1 100644 --- a/sysdeps/alpha/memchr.S +++ b/sysdeps/alpha/memchr.S @@ -119,7 +119,7 @@ $first_quad: # At least one byte left to process. - ldq t0, 8(v0) # e0 : + ldq zero, 8(v0) # e0 : prefetch next quad subq t4, 1, a2 # .. e1 : addq v0, 8, v0 #-e0 : @@ -138,19 +138,19 @@ $first_quad: # At least three quads remain to be accessed - mov t0, t3 # e0 : move prefetched value to correct reg - .align 4 $unrolled_loop: - ldq t0, 8(v0) #-e0 : prefetch t0 - xor a1, t3, t1 # .. e1 : - cmpbge zero, t1, t1 # e0 : - bne t1, $found_it # .. e1 : + ldq t0, 0(v0) # e0 : load quad + xor a1, t0, t1 # .. e1 : + ldq zero, 8(v0) # e0 : prefetch next quad + cmpbge zero, t1, t1 # .. e1: + bne t1, $found_it # e0 : - addq v0, 8, v0 #-e0 : + addq v0, 8, v0 # e1 : $odd_quad_count: + ldq t0, 0(v0) # e0 : load quad xor a1, t0, t1 # .. e1 : - ldq t3, 8(v0) # e0 : prefetch t3 + ldq zero, 8(v0) # e0 : prefetch next quad cmpbge zero, t1, t1 # .. e1 : addq v0, 8, t5 #-e0 : bne t1, $found_it # .. e1 : @@ -159,8 +159,8 @@ $odd_quad_count: addq v0, 8, v0 # .. e1 : bne t5, $unrolled_loop #-e1 : - mov t3, t0 # e0 : move prefetched value into t0 -$final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do +$final: ldq t0, 0(v0) # e0 : load last quad + subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do bne a2, $last_quad # e1 : $not_found: -- 2.7.4