2 Copyright (c) 2013, Raspberry Pi Foundation
3 Copyright (c) 2013, RISC OS Open Ltd
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of the copyright holder nor the
14 names of its contributors may be used to endorse or promote products
15 derived from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
21 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <linux/linkage.h>
32 /* Prevent the stack from becoming executable */
33 #if defined(__linux__) && defined(__ELF__)
34 .section .note.GNU-stack,"",%progbits
44 .macro memcmp_process_head unaligned
51 ldmia S_1!, {DAT0, DAT1, DAT2, DAT3}
53 ldmia S_2!, {DAT4, DAT5, DAT6, DAT7}
56 .macro memcmp_process_tail
64 .macro memcmp_leading_31bytes
65 movs DAT0, OFF, lsl #31
66 ldrmib DAT0, [S_1], #1
67 ldrcsh DAT1, [S_1], #2
68 ldrmib DAT4, [S_2], #1
69 ldrcsh DAT5, [S_2], #2
79 movs DAT0, OFF, lsl #29
84 ldmcsia S_2!, {DAT5, DAT6}
105 .macro memcmp_trailing_15bytes unaligned
108 ldrcs DAT0, [S_1], #4
109 ldrcs DAT1, [S_1], #4
111 ldmcsia S_1!, {DAT0, DAT1}
113 ldrmi DAT2, [S_1], #4
114 ldmcsia S_2!, {DAT4, DAT5}
115 ldrmi DAT6, [S_2], #4
127 ldrcsh DAT0, [S_1], #2
129 ldrcsh DAT4, [S_2], #2
140 .macro memcmp_long_inner_loop unaligned
142 memcmp_process_head unaligned
143 pld [S_2, #prefetch_distance*32 + 16]
145 memcmp_process_head unaligned
150 /* Just before the final (prefetch_distance+1) 32-byte blocks,
151 * deal with final preloads */
152 preload_trailing 0, S_1, N, DAT0
153 preload_trailing 0, S_2, N, DAT0
154 add N, N, #(prefetch_distance+2)*32 - 16
156 memcmp_process_head unaligned
160 /* Trailing words and bytes */
163 memcmp_trailing_15bytes unaligned
164 199: /* Reached end without detecting a difference */
170 .macro memcmp_short_inner_loop unaligned
171 subs N, N, #16 /* simplifies inner loop termination */
174 memcmp_process_head unaligned
178 122: /* Trailing words and bytes */
181 memcmp_trailing_15bytes unaligned
182 199: /* Reached end without detecting a difference */
189 * int memcmp(const void *s1, const void *s2, size_t n);
191 * a1 = pointer to buffer 1
192 * a2 = pointer to buffer 2
193 * a3 = number of bytes to compare (as unsigned chars)
195 * a1 = >0/=0/<0 if s1 >/=/< s2
198 .set prefetch_distance, 2
215 setend be /* lowest-addressed bytes are most significant */
217 /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
218 cmp N, #(prefetch_distance+3)*32 - 1
222 /* Adjust N so that the decrement instruction can also test for
223 * inner loop termination. We want it to stop when there are
224 * (prefetch_distance+1) complete blocks to go. */
225 sub N, N, #(prefetch_distance+2)*32
226 preload_leading_step1 0, DAT0, S_1
227 preload_leading_step1 0, DAT1, S_2
230 rsb OFF, S_2, #0 /* no need to AND with 15 here */
231 preload_leading_step2 0, DAT0, S_1, OFF, DAT2
232 preload_leading_step2 0, DAT1, S_2, OFF, DAT2
233 memcmp_leading_31bytes
234 154: /* Second source now cacheline (32-byte) aligned; we have at
235 * least one prefetch to go. */
236 /* Prefetch offset is best selected such that it lies in the
237 * first 8 of each 32 bytes - but it's just as easy to aim for
240 rsb OFF, OFF, #32*prefetch_distance
243 memcmp_long_inner_loop 0
244 140: memcmp_long_inner_loop 1
246 170: /* Short case */
249 preload_all 0, 0, 0, S_1, N, DAT0, DAT1
250 preload_all 0, 0, 0, S_2, N, DAT0, DAT1
261 174: /* Second source now 4-byte aligned; we have 0 or more bytes to go */
264 memcmp_short_inner_loop 0
265 140: memcmp_short_inner_loop 1
267 200: /* Difference found: determine sign. */