1 C sparc32/aes-encrypt-internal.asm
4 Copyright (C) 2002, 2005, 2013 Niels Möller
6 This file is part of GNU Nettle.
8 GNU Nettle is free software: you can redistribute it and/or
9 modify it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free
18 Software Foundation; either version 2 of the License, or (at your
19 option) any later version.
21 or both in parallel, as here.
23 GNU Nettle is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received copies of the GNU General Public License and
29 the GNU Lesser General Public License along with this program. If
30 not, see http://www.gnu.org/licenses/.
33 include_src(<sparc32/aes.m4>)
36 define(<ROUNDS>,<%i0>)
39 define(<LENGTH>,<%i3>)
43 C AES state, two copies for unrolling
55 C %o0-%03 are used for loop invariants T0-T3
57 define(<COUNT>, <%o5>)
59 C %g1, %g2, %g3 are TMP1, TMP2 and TMP3
61 C I'm still slightly confused by the frame layout, specified in
62 C "SYSTEM V APPLICATION BINARY INTERFACE SPARC Processor Supplement".
63 C However, Sun's cc generates a 104 byte stack frame for a function
64 C with no local variables, so that should be good enough for us too.
66 C The sparc32 stack frame looks like
68 C %fp - 4: OS-dependent link field
69 C %fp - 8: OS-dependent link field
70 C %fp - 104: OS register save area
71 define(<FRAME_SIZE>, 104)
73 .file "aes-encrypt-internal.asm"
75 C _aes_encrypt(unsigned rounds, const uint32_t *keys,
76 C const struct aes_table *T,
77 C size_t length, uint8_t *dst,
84 PROLOGUE(_nettle_aes_encrypt)
86 save %sp, -FRAME_SIZE, %sp
96 C Must be even, and includes the final round
98 C Last two rounds handled specially
102 C Read src, and add initial subkey
104 AES_LOAD(0, SRC, KEY, W0)
105 AES_LOAD(1, SRC, KEY, W1)
106 AES_LOAD(2, SRC, KEY, W2)
107 AES_LOAD(3, SRC, KEY, W3)
114 C The AES_ROUND macro uses T0,... T3
116 AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
117 AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
118 AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
119 AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
122 AES_ROUND(4, X0, X1, X2, X3, KEY, W0)
123 AES_ROUND(5, X1, X2, X3, X0, KEY, W1)
124 AES_ROUND(6, X2, X3, X0, X1, KEY, W2)
125 AES_ROUND(7, X3, X0, X1, X2, KEY, W3)
127 subcc COUNT, 1, COUNT
132 AES_ROUND(0, W0, W1, W2, W3, KEY, X0)
133 AES_ROUND(1, W1, W2, W3, W0, KEY, X1)
134 AES_ROUND(2, W2, W3, W0, W1, KEY, X2)
135 AES_ROUND(3, W3, W0, W1, W2, KEY, X3)
139 AES_FINAL_ROUND(0, T, X0, X1, X2, X3, KEY, DST)
140 AES_FINAL_ROUND(1, T, X1, X2, X3, X0, KEY, DST)
141 AES_FINAL_ROUND(2, T, X2, X3, X0, X1, KEY, DST)
142 AES_FINAL_ROUND(3, T, X3, X0, X1, X2, KEY, DST)
144 subcc LENGTH, 16, LENGTH
151 EPILOGUE(_nettle_aes_encrypt)
153 C Some stats from adriana.lysator.liu.se (SS1000$, 85 MHz), for AES 128
155 C 1: nettle-1.13 C-code
156 C 2: nettle-1.13 assembler
158 C 4: New assembler, first correct version
159 C 5: New assembler, with basic scheduling of AES_ROUND.
160 C 6: New assembpler, with loop invariants T0-T3.
161 C 7: New assembler, with basic scheduling also of AES_FINAL_ROUND.
163 C MB/s cycles/block Code size (bytes)