4 Copyright (C) 2013 Niels Möller
6 This file is part of GNU Nettle.
8 GNU Nettle is free software: you can redistribute it and/or
9 modify it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free
18 Software Foundation; either version 2 of the License, or (at your
19 option) any later version.
21 or both in parallel, as here.
23 GNU Nettle is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received copies of the GNU General Public License and
29 the GNU Lesser General Public License along with this program. If
30 not, see http://www.gnu.org/licenses/.
35 C The ldm instruction can do load two registers per cycle,
36 C if the address is two-word aligned. Or three registers in two
37 C cycles, regardless of alignment.
54 C memxor(void *dst, const void *src, size_t n)
56 PROLOGUE(nettle_memxor)
84 bne .Lmemxor_align_loop
86 C We have at least 4 bytes left to do here.
92 C Different alignment case.
100 C With little-endian, we need to do
101 C DST[i] ^= (SRC[i] >> CNT) ^ (SRC[i+1] << TNC)
120 eor r3, r3, r4, lsr CNT
121 eor r3, r3, r5, lsl TNC
126 eor r3, r3, r5, lsr CNT
127 eor r3, r3, r4, lsl TNC
130 bcs .Lmemxor_word_loop
132 beq .Lmemxor_odd_done
134 C We have TNC/8 left-over bytes in r4, high end
141 C Store bytes, one by one.
148 bne .Lmemxor_leftover
155 push {r4,r5,r6,r7,r8,r10,r11,r14} C lr is the link register
158 bcc .Lmemxor_same_end
160 ldmia SRC!, {r3, r4, r5}
161 C Keep address for loads in r14
163 ldmia r14!, {r6, r7, r8}
168 bcc .Lmemxor_same_final_store
170 ldmia r14!, {r6, r7, r8}
171 bcc .Lmemxor_same_wind_down
173 C 6 cycles per iteration, 0.50 cycles/byte. For this speed,
174 C loop starts at offset 0x11c in the object file.
177 C r10-r12 contains values to be stored at DST
178 C r6-r8 contains values read from r14, in advance
179 ldmia SRC!, {r3, r4, r5}
181 stmia DST!, {r10, r11, r12}
185 ldmia r14!, {r6, r7, r8}
186 bcs .Lmemxor_same_loop
188 .Lmemxor_same_wind_down:
190 ldmia SRC!, {r3, r4, r5}
191 stmia DST!, {r10, r11, r12}
195 .Lmemxor_same_final_store:
196 stmia DST!, {r10, r11, r12}
199 C We have 0-11 bytes left to do, and N holds number of bytes -12.
201 bcc .Lmemxor_same_lt_8
202 C Do 8 bytes more, leftover is in N
208 pop {r4,r5,r6,r7,r8,r10,r11,r14}
213 pop {r4,r5,r6,r7,r8,r10,r11,r14}
215 bcc .Lmemxor_same_lt_4
229 EPILOGUE(nettle_memxor)