1 C sparc32/arcfour-crypt.asm
4 Copyright (C) 2002, 2005 Niels Möller
6 This file is part of GNU Nettle.
8 GNU Nettle is free software: you can redistribute it and/or
9 modify it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free
18 Software Foundation; either version 2 of the License, or (at your
19 option) any later version.
21 or both in parallel, as here.
23 GNU Nettle is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received copies of the GNU General Public License and
29 the GNU Lesser General Public License along with this program. If
30 not, see http://www.gnu.org/licenses/.
33 C Define to YES, to enable the complex code to special case SRC
34 C and DST with compatible alignment.
36 define(<WITH_ALIGN>, <YES>)
41 define(<LENGTH>,<%i1>)
55 C Computes the next byte of the key stream. As input, i must
56 C already point to the index for the current access, the index
57 C for the next access is stored in ni. The resulting key byte is
59 C ARCFOUR_BYTE(i, ni, res)
60 define(<ARCFOUR_BYTE>, <
74 C FIXME: Consider using the callers window
75 define(<FRAME_SIZE>, 104)
77 .file "arcfour-crypt.asm"
79 C arcfour_crypt(struct arcfour_ctx *ctx,
80 C size_t length, uint8_t *dst,
87 PROLOGUE(nettle_arcfour_crypt)
89 save %sp, -FRAME_SIZE, %sp
95 lduh [CTX + ARCFOUR_I], I1
99 C We want an even address for DST
107 ARCFOUR_BYTE(I2, I1, TMP)
108 subcc LENGTH, 1, LENGTH
119 C Harmless delay slot instruction
125 ARCFOUR_BYTE(I1, I2, TMP)
131 ARCFOUR_BYTE(I2, I1, TMP)
133 subcc LENGTH, 2, LENGTH
143 C Harmless delay slot instruction
147 C Main loop, with aligned writes
149 C FIXME: Could check if SRC is aligned, and
150 C use 32-bit reads in that case.
153 ARCFOUR_BYTE(I1, I2, TMP)
158 ARCFOUR_BYTE(I2, I1, TMP)
164 ARCFOUR_BYTE(I1, I2, TMP)
170 ARCFOUR_BYTE(I2, I1, TMP)
179 andcc LENGTH, 3, LENGTH
184 C DST address must be 2-aligned
190 ARCFOUR_BYTE(I1, I2, TMP)
196 ARCFOUR_BYTE(I2, I1, TMP)
207 ARCFOUR_BYTE(I2, I1, TMP)
215 stuh I2, [CTX + ARCFOUR_I]
221 EPILOGUE(nettle_arcfour_crypt)
223 C Some stats from adriana.lysator.liu.se (SS1000E, 85 MHz), for AES 128
225 C 1: nettle-1.13 C-code
226 C 2: First working version of the assembler code
227 C 3: Moved load of source byte
228 C 4: Better instruction scheduling
229 C 5: Special case SRC and DST with compatible alignment
230 C 6: After bugfix (reorder of ld [CTX+SI+SJ] and st [CTX + SI])
231 C 7: Unrolled only twice, with byte-accesses
232 C 8: Unrolled, using 8-bit reads and aligned 32-bit writes.
234 C MB/s cycles/byte Code size (bytes)