2 ---------------------------------------------------------------------------
3 Copyright (c) 2002, Dr Brian Gladman < >, Worcester, UK.
8 The free distribution and use of this software in both source and binary
9 form is allowed (with or without changes) provided that:
11 1. distributions of this source code include the above copyright
12 notice, this list of conditions and the following disclaimer;
14 2. distributions in binary form include the above copyright
15 notice, this list of conditions and the following disclaimer
16 in the documentation and/or other associated materials;
18 3. the copyright holder's name is not used to endorse products
19 built using this software without specific written permission.
21 ALTERNATIVELY, provided that this notice is retained in full, this product
22 may be distributed under the terms of the GNU General Public License (GPL),
23 in which case the provisions of the GPL apply INSTEAD OF those given above.
27 This software is provided 'as is' with no explicit or implied warranties
28 in respect of its properties, including, but not limited to, correctness
29 and/or fitness for purpose.
30 ---------------------------------------------------------------------------
31 Issue Date: 24/01/2003
33 This file contains the code for implementing encryption and decryption
34 for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
35 can optionally be replaced by code written in assembler using NASM.
40 #if defined(__cplusplus)
45 #if defined(BLOCK_SIZE) && (BLOCK_SIZE & 7)
46 #error An illegal block size has been specified.
49 #define unused 77 /* Sunset Strip */
51 #define si(y,x,k,c) (s(y,c) = word_in(x + 4 * c) ^ k[c])
52 #define so(y,x,c) word_out(y + 4 * c, s(x,c))
57 #define locals(y,x) x[4],y[4]
59 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
61 the following defines prevent the compiler requiring the declaration
62 of generated but unused variables in the fwd_var and inv_var macros
73 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
74 s(y,2) = s(x,2); s(y,3) = s(x,3);
75 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
76 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
77 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
79 #elif BLOCK_SIZE == 24
82 #define locals(y,x) x[6],y[6]
84 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
85 y##0,y##1,y##2,y##3,y##4,y##5
91 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
92 s(y,2) = s(x,2); s(y,3) = s(x,3); \
93 s(y,4) = s(x,4); s(y,5) = s(x,5);
94 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
95 si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
96 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
97 so(y,x,3); so(y,x,4); so(y,x,5)
98 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
99 rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
103 #define locals(y,x) x[8],y[8]
105 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
106 y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
108 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
109 s(y,2) = s(x,2); s(y,3) = s(x,3); \
110 s(y,4) = s(x,4); s(y,5) = s(x,5); \
111 s(y,6) = s(x,6); s(y,7) = s(x,7);
115 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
116 si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
117 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
118 so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
119 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
120 rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
123 #define state_in(y,x,k) \
125 { case 8: si(y,x,k,7); si(y,x,k,6); \
126 case 6: si(y,x,k,5); si(y,x,k,4); \
127 case 4: si(y,x,k,3); si(y,x,k,2); \
128 si(y,x,k,1); si(y,x,k,0); \
131 #define state_out(y,x) \
133 { case 8: so(y,x,7); so(y,x,6); \
134 case 6: so(y,x,5); so(y,x,4); \
135 case 4: so(y,x,3); so(y,x,2); \
136 so(y,x,1); so(y,x,0); \
139 #if defined(FAST_VARIABLE)
141 #define round(rm,y,x,k) \
143 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
144 rm(y,x,k,5); rm(y,x,k,4); \
145 rm(y,x,k,3); rm(y,x,k,2); \
146 rm(y,x,k,1); rm(y,x,k,0); \
148 case 6: rm(y,x,k,5); rm(y,x,k,4); \
149 rm(y,x,k,3); rm(y,x,k,2); \
150 rm(y,x,k,1); rm(y,x,k,0); \
152 case 4: rm(y,x,k,3); rm(y,x,k,2); \
153 rm(y,x,k,1); rm(y,x,k,0); \
158 #define round(rm,y,x,k) \
160 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
161 case 6: rm(y,x,k,5); rm(y,x,k,4); \
162 case 4: rm(y,x,k,3); rm(y,x,k,2); \
163 rm(y,x,k,1); rm(y,x,k,0); \
171 #if defined(ENCRYPTION) && !defined(AES_ASM)
173 /* Given the column (c) of the output state variable, the following
174 macros give the input state variables which are needed in its
175 computation for each row (r) of the state. All the alternative
176 macros give the same end values but expand into different ways
177 of calculating these values. In particular the complex macro
178 used for dynamically variable block sizes is designed to expand
179 to a compile time constant whenever possible but will expand to
180 conditional clauses on some branches (I am grateful to Frank
181 Yellin for this construction)
184 #if defined(BLOCK_SIZE)
186 # define fwd_var(x,r,c) s(x,((r+c)%nc))
188 #define fwd_var(x,r,c) s(x,(r+c+(((r>1)&&(nc>9-r))?1:0))%nc)
191 #define fwd_var(x,r,c)\
197 : c == 3 ? nc == 4 ? s(x,0) : s(x,4) \
199 : c == 5 ? nc == 8 ? s(x,6) : s(x,0) \
200 : c == 6 ? s(x,7) : s(x,0)) \
202 ( c == 0 ? nc == 8 ? s(x,3) : s(x,2) \
203 : c == 1 ? nc == 8 ? s(x,4) : s(x,3) \
204 : c == 2 ? nc == 4 ? s(x,0) : nc == 8 ? s(x,5) : s(x,4) \
205 : c == 3 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,6) : s(x,5) \
206 : c == 4 ? nc == 8 ? s(x,7) : s(x,0) \
207 : c == 5 ? nc == 8 ? s(x,0) : s(x,1) \
208 : c == 6 ? s(x,1) : s(x,2)) \
210 ( c == 0 ? nc == 8 ? s(x,4) : s(x,3) \
211 : c == 1 ? nc == 4 ? s(x,0) : nc == 8 ? s(x,5) : s(x,4) \
212 : c == 2 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,6) : s(x,5) \
213 : c == 3 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,7) : s(x,0) \
214 : c == 4 ? nc == 8 ? s(x,0) : s(x,1) \
215 : c == 5 ? nc == 8 ? s(x,1) : s(x,2) \
216 : c == 6 ? s(x,2) : s(x,3)))
222 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
223 #elif defined(FT1_SET)
226 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
228 #define fwd_rnd(y,x,k,c) (s(y,c) = fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)) ^ (k)[c])
232 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
233 #elif defined(FL1_SET)
234 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
236 #define fwd_lrnd(y,x,k,c) (s(y,c) = no_table(x,t_use(s,box),fwd_var,rf1,c) ^ (k)[c])
239 INTERNAL aes_rval aes_encrypt_block(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
240 { aes_32t locals(b0, b1);
241 const aes_32t *kp = cx->k_sch;
242 dec_fmvars /* declare variables for fwd_mcol() if needed */
244 if(!(cx->n_blk & 1)) return aes_bad;
246 state_in(b0, in_blk, kp);
248 #if (ENC_UNROLL == FULL)
250 kp += (cx->n_rnd - 9) * nc;
252 /*lint -e{616} control flows into case/default */
256 round(fwd_rnd, b1, b0, kp - 4 * nc);
257 round(fwd_rnd, b0, b1, kp - 3 * nc);
260 round(fwd_rnd, b1, b0, kp - 2 * nc);
261 round(fwd_rnd, b0, b1, kp - nc);
264 round(fwd_rnd, b1, b0, kp );
265 round(fwd_rnd, b0, b1, kp + nc);
266 round(fwd_rnd, b1, b0, kp + 2 * nc);
267 round(fwd_rnd, b0, b1, kp + 3 * nc);
268 round(fwd_rnd, b1, b0, kp + 4 * nc);
269 round(fwd_rnd, b0, b1, kp + 5 * nc);
270 round(fwd_rnd, b1, b0, kp + 6 * nc);
271 round(fwd_rnd, b0, b1, kp + 7 * nc);
272 round(fwd_rnd, b1, b0, kp + 8 * nc);
273 round(fwd_lrnd, b0, b1, kp + 9 * nc);
280 #if (ENC_UNROLL == PARTIAL)
282 for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd)
285 round(fwd_rnd, b1, b0, kp);
287 round(fwd_rnd, b0, b1, kp);
290 round(fwd_rnd, b1, b0, kp);
292 { aes_32t rnd, *p0 = b0, *p1 = b1, *pt;
293 for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
296 round(fwd_rnd, p1, p0, kp);
297 pt = p0, p0 = p1, p1 = pt;
301 round(fwd_lrnd, b0, b1, kp);
305 state_out(out_blk, b0);
311 #if defined(DECRYPTION) && !defined(AES_ASM)
313 /* Given the column (c) of the output state variable, the following
314 macros give the input state variables which are needed in its
315 computation for each row (r) of the state. All the alternative
316 macros give the same end values but expand into different ways
317 of calculating these values. In particular the complex macro
318 used for dynamically variable block sizes is designed to expand
319 to a compile time constant whenever possible but will expand to
320 conditional clauses on some branches (I am grateful to Frank
321 Yellin for this construction)
324 #if defined(BLOCK_SIZE)
326 #define inv_var(x,r,c) s(x,((4+c-r)%nc))
328 #define inv_var(x,r,c) s(x,(840+c-r-(((r>1)&&(nc>9-r))?1:0))%nc)
331 #define inv_var(x,r,c)\
334 ( c == 0 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,7) : s(x,5) \
340 : c == 6 ? s(x,5) : s(x,6)) \
342 ( c == 0 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,5) : s(x,4) \
343 : c == 1 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,6) : s(x,5) \
344 : c == 2 ? nc == 8 ? s(x,7) : s(x,0) \
345 : c == 3 ? nc == 8 ? s(x,0) : s(x,1) \
346 : c == 4 ? nc == 8 ? s(x,1) : s(x,2) \
347 : c == 5 ? nc == 8 ? s(x,2) : s(x,3) \
348 : c == 6 ? s(x,3) : s(x,4)) \
350 ( c == 0 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,4) : s(x,3) \
351 : c == 1 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,5) : s(x,4) \
352 : c == 2 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,6) : s(x,5) \
353 : c == 3 ? nc == 8 ? s(x,7) : s(x,0) \
354 : c == 4 ? nc == 8 ? s(x,0) : s(x,1) \
355 : c == 5 ? nc == 8 ? s(x,1) : s(x,2) \
356 : c == 6 ? s(x,2) : s(x,3)))
362 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
363 #elif defined(IT1_SET)
366 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
368 #define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol(no_table(x,t_use(i,box),inv_var,rf1,c) ^ (k)[c]))
372 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
373 #elif defined(IL1_SET)
374 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
376 #define inv_lrnd(y,x,k,c) (s(y,c) = no_table(x,t_use(i,box),inv_var,rf1,c) ^ (k)[c])
379 INTERNAL aes_rval aes_decrypt_block(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
380 { aes_32t locals(b0, b1);
381 const aes_32t *kp = cx->k_sch + nc * cx->n_rnd;
382 dec_imvars /* declare variables for inv_mcol() if needed */
384 if(!(cx->n_blk & 2)) return aes_bad;
386 state_in(b0, in_blk, kp);
388 #if (DEC_UNROLL == FULL)
390 kp = cx->k_sch + 9 * nc;
392 /*lint -e{616} control flows into case/default */
396 round(inv_rnd, b1, b0, kp + 4 * nc);
397 round(inv_rnd, b0, b1, kp + 3 * nc);
399 round(inv_rnd, b1, b0, kp + 2 * nc);
400 round(inv_rnd, b0, b1, kp + nc );
402 round(inv_rnd, b1, b0, kp );
403 round(inv_rnd, b0, b1, kp - nc);
404 round(inv_rnd, b1, b0, kp - 2 * nc);
405 round(inv_rnd, b0, b1, kp - 3 * nc);
406 round(inv_rnd, b1, b0, kp - 4 * nc);
407 round(inv_rnd, b0, b1, kp - 5 * nc);
408 round(inv_rnd, b1, b0, kp - 6 * nc);
409 round(inv_rnd, b0, b1, kp - 7 * nc);
410 round(inv_rnd, b1, b0, kp - 8 * nc);
411 round(inv_lrnd, b0, b1, kp - 9 * nc);
417 #if (DEC_UNROLL == PARTIAL)
419 for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd)
422 round(inv_rnd, b1, b0, kp);
424 round(inv_rnd, b0, b1, kp);
427 round(inv_rnd, b1, b0, kp);
429 { aes_32t rnd, *p0 = b0, *p1 = b1, *pt;
430 for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
433 round(inv_rnd, p1, p0, kp);
434 pt = p0, p0 = p1, p1 = pt;
438 round(inv_lrnd, b0, b1, kp);
442 state_out(out_blk, b0);
448 #if defined(__cplusplus)