1 C -*- mode: asm; asm-comment-char: ?C; -*-
2 C nettle, low-level cryptographics library
4 C Copyright (C) 2002, 2005 Niels Möller
6 C The nettle library is free software; you can redistribute it and/or modify
7 C it under the terms of the GNU Lesser General Public License as published by
8 C the Free Software Foundation; either version 2.1 of the License, or (at your
9 C option) any later version.
11 C The nettle library is distributed in the hope that it will be useful, but
12 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 C License for more details.
16 C You should have received a copy of the GNU Lesser General Public License
17 C along with the nettle library; see the file COPYING.LIB. If not, write to
18 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 C Define to YES, to enable the complex code to special case SRC
22 C and DST with compatible alignment.
24 define(<WITH_ALIGN>, <YES>)
29 define(<LENGTH>,<%i1>)
43 C Computes the next byte of the key stream. As input, i must
44 C already point to the index for the current access, the index
45 C for the next access is stored in ni. The resulting key byte is
47 C ARCFOUR_BYTE(i, ni, res)
48 define(<ARCFOUR_BYTE>, <
62 C FIXME: Consider using the callers window
63 define(<FRAME_SIZE>, 104)
65 .file "arcfour-crypt.asm"
67 C arcfour_crypt(struct arcfour_ctx *ctx,
68 C unsigned length, uint8_t *dst,
75 PROLOGUE(nettle_arcfour_crypt)
77 save %sp, -FRAME_SIZE, %sp
83 lduh [CTX + ARCFOUR_I], I1
87 C We want an even address for DST
95 ARCFOUR_BYTE(I2, I1, TMP)
96 subcc LENGTH, 1, LENGTH
107 C Harmless delay slot instruction
113 ARCFOUR_BYTE(I1, I2, TMP)
119 ARCFOUR_BYTE(I2, I1, TMP)
121 subcc LENGTH, 2, LENGTH
131 C Harmless delay slot instruction
135 C Main loop, with aligned writes
137 C FIXME: Could check if SRC is aligned, and
138 C use 32-bit reads in that case.
141 ARCFOUR_BYTE(I1, I2, TMP)
146 ARCFOUR_BYTE(I2, I1, TMP)
152 ARCFOUR_BYTE(I1, I2, TMP)
158 ARCFOUR_BYTE(I2, I1, TMP)
167 andcc LENGTH, 3, LENGTH
172 C DST address must be 2-aligned
178 ARCFOUR_BYTE(I1, I2, TMP)
184 ARCFOUR_BYTE(I2, I1, TMP)
195 ARCFOUR_BYTE(I2, I1, TMP)
203 stuh I2, [CTX + ARCFOUR_I]
209 EPILOGUE(nettle_arcfour_crypt)
211 C Some stats from adriana.lysator.liu.se (SS1000E, 85 MHz), for AES 128
213 C 1: nettle-1.13 C-code
214 C 2: First working version of the assembler code
215 C 3: Moved load of source byte
216 C 4: Better instruction scheduling
217 C 5: Special case SRC and DST with compatible alignment
218 C 6: After bugfix (reorder of ld [CTX+SI+SJ] and st [CTX + SI])
219 C 7: Unrolled only twice, with byte-accesses
220 C 8: Unrolled, using 8-bit reads and aligned 32-bit writes.
222 C MB/s cycles/byte Code size (bytes)