1 /* Copyright (C) 1996 Free Software Foundation, Inc.
2 Contributed by David Mosberger (davidm@cs.arizona.edu).
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If
18 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
19 Cambridge, MA 02139, USA. */
21 /* Finds characters in a memory area. Optimized for the Alpha
24 - memory accessed as aligned quadwords only
25 - uses cmpbge to compare 8 bytes in parallel
26 - does binary search to find 0 byte in last
27 quadword (HAKMEM needed 12 instructions to
28 do this instead of the 9 instructions that
31 For correctness consider that:
33 - only minimum number of quadwords may be accessed
34 - the third argument is an unsigned long
46 ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned)
48 and a1, 0xff, a1 # a1 = 00000000000000ch
50 sll a1, 8, t1 # t1 = 000000000000ch00
52 or t1, a1, a1 # a1 = 000000000000chch
53 sll a1, 16, t1 # t1 = 00000000chch0000
55 or t1, a1, a1 # a1 = 00000000chchchch
56 sll a1, 32, t1 # t1 = chchchch00000000
58 or t1, a1, a1 # a1 = chchchchchchchch
64 or t6, t5, t0 # t0 = quadword starting at a0
67 # Deal with the case where at most 8 bytes remain to be searched
70 # t0 = ????c6c5c4c3c2c1
73 srl t2, t5, t5 # t5 = mask of a2 bits set
80 # now, determine which byte matched:
100 # Deal with the case where a2 > 8 bytes remain to be
101 # searched. a0 may not be aligned.
105 insqh t2, a0, t1 # t1 = 0000ffffffffffff (a0<0:2> ff bytes)
107 or t0, t1, t0 # t0 = ====ffffffffffff
111 /* at least one byte left to process */
116 * Make a2 point to last quad to be accessed (the
117 * last quad may or may not be partial).
124 /* at least two quads remain to be accessed */
126 subq a2, v0, t3 # t3 <- number of quads to be processed in loop
127 and t3, 8, t3 # odd number of quads?
128 bne t3, odd_quad_count
130 /* at least three quads remain to be accessed */
132 mov t0, t3 # move prefetched value into correct register
136 ldq t0, 8(v0) # prefetch t0
144 ldq t3, 8(v0) # prefetch t3
150 bne t5, unrolled_loop
152 mov t3, t0 # move prefetched value into t0
153 final: subq t4, v0, a2 # a2 <- number of bytes left to do