3 The twofish block cipher.
5 Copyright (C) 2001, 2014 Niels Möller
6 Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>
8 Modifications for lsh, integrated testing
9 Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>
11 This file is part of GNU Nettle.
13 GNU Nettle is free software: you can redistribute it and/or
14 modify it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free
23 Software Foundation; either version 2 of the License, or (at your
24 option) any later version.
26 or both in parallel, as here.
28 GNU Nettle is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 General Public License for more details.
33 You should have received copies of the GNU General Public License and
34 the GNU Lesser General Public License along with this program. If
35 not, see http://www.gnu.org/licenses/.
49 /* Bitwise rotations on 32-bit words. These are defined as macros that
50 * evaluate their argument twice, so do not apply to any expressions with
54 #define rol1(x) (((x) << 1) | (((x) & 0x80000000) >> 31))
55 #define rol8(x) (((x) << 8) | (((x) & 0xFF000000) >> 24))
56 #define rol9(x) (((x) << 9) | (((x) & 0xFF800000) >> 23))
57 #define ror1(x) (((x) >> 1) | (((x) & 0x00000001) << 31))
59 /* ------------------------------------------------------------------------- */
61 /* The permutations q0 and q1. These are fixed permutations on 8-bit values.
62 * The permutations have been computed using the program twofish-data,
63 * which is distributed along with this file.
66 static const uint8_t q0[256] = {
67 0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
68 0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
69 0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
70 0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
71 0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
72 0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
73 0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
74 0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
75 0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
76 0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
77 0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
78 0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
79 0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
80 0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
81 0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
82 0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
83 0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
84 0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
85 0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
86 0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
87 0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
88 0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
89 0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
90 0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
91 0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
92 0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
93 0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
94 0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
95 0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
96 0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
97 0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
98 0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
101 static const uint8_t q1[256] = {
102 0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
103 0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
104 0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
105 0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
106 0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
107 0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
108 0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
109 0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
110 0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
111 0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
112 0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
113 0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
114 0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
115 0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
116 0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
117 0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
118 0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
119 0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
120 0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
121 0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
122 0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
123 0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
124 0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
125 0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
126 0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
127 0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
128 0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
129 0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
130 0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
131 0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
132 0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
133 0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
136 /* ------------------------------------------------------------------------- */
138 /* uint8_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
140 * Multiplication in GF(2^8).
142 * This function multiplies a times b in the Galois Field GF(2^8) with
143 * primitive polynomial p.
144 * The representation of the polynomials a, b, and p uses bits with
145 * values 2^i to represent the terms x^i. The polynomial p contains an
148 * Note that addition and subtraction in GF(2^8) is simply the XOR
153 gf_multiply(uint8_t p, uint8_t a, uint8_t b)
159 if (a & 1) result ^= shift;
162 if (shift & 0x100) shift ^= p;
167 /* ------------------------------------------------------------------------- */
169 /* The matrix RS as specified in section 4.3 the twofish paper. */
171 static const uint8_t rs_matrix[4][8] = {
172 { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
173 { 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
174 { 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
175 { 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };
177 /* uint32_t compute_s(uint32_t m1, uint32_t m2);
179 * Computes the value RS * M, where M is a byte vector composed of the
180 * bytes of m1 and m2. Arithmetic is done in GF(2^8) with primitive
181 * polynomial x^8 + x^6 + x^3 + x^2 + 1.
183 * This function is used to compute the sub-keys S which are in turn used
184 * to generate the S-boxes.
188 compute_s(uint32_t m1, uint32_t m2)
192 for (i = 0; i < 4; i++)
193 s |= (( gf_multiply(0x4D, m1, rs_matrix[i][0])
194 ^ gf_multiply(0x4D, m1 >> 8, rs_matrix[i][1])
195 ^ gf_multiply(0x4D, m1 >> 16, rs_matrix[i][2])
196 ^ gf_multiply(0x4D, m1 >> 24, rs_matrix[i][3])
197 ^ gf_multiply(0x4D, m2, rs_matrix[i][4])
198 ^ gf_multiply(0x4D, m2 >> 8, rs_matrix[i][5])
199 ^ gf_multiply(0x4D, m2 >> 16, rs_matrix[i][6])
200 ^ gf_multiply(0x4D, m2 >> 24, rs_matrix[i][7])) << (i*8));
204 /* ------------------------------------------------------------------------- */
206 /* This table describes which q S-boxes are used for each byte in each stage
207 * of the function h, cf. figure 2 of the twofish paper.
210 static const uint8_t * const q_table[4][5] =
211 { { q1, q1, q0, q0, q1 },
212 { q0, q1, q1, q0, q0 },
213 { q0, q0, q0, q1, q1 },
214 { q1, q0, q1, q1, q0 } };
216 /* The matrix MDS as specified in section 4.3.2 of the twofish paper. */
218 static const uint8_t mds_matrix[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
219 { 0x5B, 0xEF, 0xEF, 0x01 },
220 { 0xEF, 0x5B, 0x01, 0xEF },
221 { 0xEF, 0x01, 0xEF, 0x5B } };
223 /* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
225 * Perform the h function (section 4.3.2) on one byte. It consists of
226 * repeated applications of the q permutation, followed by a XOR with
227 * part of a sub-key. Finally, the value is multiplied by one column of
228 * the MDS matrix. To obtain the result for a full word, the results of
229 * h for the individual bytes are XORed.
231 * k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
232 * actual byte to apply the function to; l0, l1, l2, and l3 are the
233 * appropriate bytes from the subkey. Note that only l0..l(k-1) are used.
237 h_byte(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3)
239 uint8_t y = q_table[i][4][l0 ^
241 q_table[i][2][k == 2 ? x : l2 ^
242 q_table[i][1][k == 3 ? x : l3 ^ q_table[i][0][x]]]]];
244 return ( ((uint32_t)gf_multiply(0x69, mds_matrix[0][i], y))
245 | ((uint32_t)gf_multiply(0x69, mds_matrix[1][i], y) << 8)
246 | ((uint32_t)gf_multiply(0x69, mds_matrix[2][i], y) << 16)
247 | ((uint32_t)gf_multiply(0x69, mds_matrix[3][i], y) << 24) );
250 /* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
252 * Perform the function h on a word. See the description of h_byte() above.
256 h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3)
258 return ( h_byte(k, 0, x, l0, l1, l2, l3)
259 ^ h_byte(k, 1, x, l0 >> 8, l1 >> 8, l2 >> 8, l3 >> 8)
260 ^ h_byte(k, 2, x, l0 >> 16, l1 >> 16, l2 >> 16, l3 >> 16)
261 ^ h_byte(k, 3, x, l0 >> 24, l1 >> 24, l2 >> 24, l3 >> 24) );
265 /* ------------------------------------------------------------------------- */
269 /* Structure which contains the tables containing the subkeys and the
270 * key-dependent s-boxes.
274 /* Set up internal tables required for twofish encryption and decryption.
276 * The key size is specified in bytes. Key sizes up to 32 bytes are
277 * supported. Larger key sizes are silently truncated.
281 twofish_set_key(struct twofish_ctx *context,
282 size_t keysize, const uint8_t *key)
284 uint8_t key_copy[32];
285 uint32_t m[8], s[4], t;
288 /* Extend key as necessary */
290 assert(keysize <= 32);
292 /* We do a little more copying than necessary, but that doesn't
294 memset(key_copy, 0, 32);
295 memcpy(key_copy, key, keysize);
297 for (i = 0; i<8; i++)
298 m[i] = LE_READ_UINT32(key_copy + i*4);
302 else if (keysize <= 24)
307 /* Compute sub-keys */
309 for (i = 0; i < 20; i++)
311 t = h(k, 2*i+1, m[1], m[3], m[5], m[7]);
313 t += (context->keys[2*i] =
314 t + h(k, 2*i, m[0], m[2], m[4], m[6]));
316 context->keys[2*i+1] = t;
319 /* Compute key-dependent S-boxes */
321 for (i = 0; i < k; i++)
322 s[k-1-i] = compute_s(m[2*i], m[2*i+1]);
324 for (i = 0; i < 4; i++)
325 for (j = 0; j < 256; j++)
326 context->s_box[i][j] = h_byte(k, i, j,
334 twofish128_set_key(struct twofish_ctx *context, const uint8_t *key)
336 twofish_set_key (context, TWOFISH128_KEY_SIZE, key);
339 twofish192_set_key(struct twofish_ctx *context, const uint8_t *key)
341 twofish_set_key (context, TWOFISH192_KEY_SIZE, key);
344 twofish256_set_key(struct twofish_ctx *context, const uint8_t *key)
346 twofish_set_key (context, TWOFISH256_KEY_SIZE, key);
349 /* Encrypt blocks of 16 bytes of data with the twofish algorithm.
351 * Before this function can be used, twofish_set_key() must be used in order to
352 * set up various tables required for the encryption algorithm.
354 * This function always encrypts 16 bytes of plaintext to 16 bytes of
355 * ciphertext. The memory areas of the plaintext and the ciphertext can
360 twofish_encrypt(const struct twofish_ctx *context,
363 const uint8_t *plaintext)
365 const uint32_t * keys = context->keys;
366 const uint32_t (*s_box)[256] = context->s_box;
368 assert( !(length % TWOFISH_BLOCK_SIZE) );
369 for ( ; length; length -= TWOFISH_BLOCK_SIZE)
372 uint32_t r0, r1, r2, r3, t0, t1;
375 for (i = 0; i<4; i++, plaintext += 4)
376 words[i] = LE_READ_UINT32(plaintext);
378 r0 = words[0] ^ keys[0];
379 r1 = words[1] ^ keys[1];
380 r2 = words[2] ^ keys[2];
381 r3 = words[3] ^ keys[3];
383 for (i = 0; i < 8; i++) {
384 t1 = ( s_box[1][r1 & 0xFF]
385 ^ s_box[2][(r1 >> 8) & 0xFF]
386 ^ s_box[3][(r1 >> 16) & 0xFF]
387 ^ s_box[0][(r1 >> 24) & 0xFF]);
388 t0 = ( s_box[0][r0 & 0xFF]
389 ^ s_box[1][(r0 >> 8) & 0xFF]
390 ^ s_box[2][(r0 >> 16) & 0xFF]
391 ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
392 r3 = (t1 + t0 + keys[4*i+9]) ^ rol1(r3);
393 r2 = (t0 + keys[4*i+8]) ^ r2;
396 t1 = ( s_box[1][r3 & 0xFF]
397 ^ s_box[2][(r3 >> 8) & 0xFF]
398 ^ s_box[3][(r3 >> 16) & 0xFF]
399 ^ s_box[0][(r3 >> 24) & 0xFF]);
400 t0 = ( s_box[0][r2 & 0xFF]
401 ^ s_box[1][(r2 >> 8) & 0xFF]
402 ^ s_box[2][(r2 >> 16) & 0xFF]
403 ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
404 r1 = (t1 + t0 + keys[4*i+11]) ^ rol1(r1);
405 r0 = (t0 + keys[4*i+10]) ^ r0;
409 words[0] = r2 ^ keys[4];
410 words[1] = r3 ^ keys[5];
411 words[2] = r0 ^ keys[6];
412 words[3] = r1 ^ keys[7];
414 for (i = 0; i<4; i++, ciphertext += 4)
415 LE_WRITE_UINT32(ciphertext, words[i]);
419 /* Decrypt blocks of 16 bytes of data with the twofish algorithm.
421 * Before this function can be used, twofish_set_key() must be used in order to
422 * set up various tables required for the decryption algorithm.
424 * This function always decrypts 16 bytes of ciphertext to 16 bytes of
425 * plaintext. The memory areas of the plaintext and the ciphertext can
430 twofish_decrypt(const struct twofish_ctx *context,
433 const uint8_t *ciphertext)
436 const uint32_t *keys = context->keys;
437 const uint32_t (*s_box)[256] = context->s_box;
439 assert( !(length % TWOFISH_BLOCK_SIZE) );
440 for ( ; length; length -= TWOFISH_BLOCK_SIZE)
443 uint32_t r0, r1, r2, r3, t0, t1;
446 for (i = 0; i<4; i++, ciphertext += 4)
447 words[i] = LE_READ_UINT32(ciphertext);
449 r0 = words[2] ^ keys[6];
450 r1 = words[3] ^ keys[7];
451 r2 = words[0] ^ keys[4];
452 r3 = words[1] ^ keys[5];
454 for (i = 0; i < 8; i++) {
455 t1 = ( s_box[1][r3 & 0xFF]
456 ^ s_box[2][(r3 >> 8) & 0xFF]
457 ^ s_box[3][(r3 >> 16) & 0xFF]
458 ^ s_box[0][(r3 >> 24) & 0xFF]);
459 t0 = ( s_box[0][r2 & 0xFF]
460 ^ s_box[1][(r2 >> 8) & 0xFF]
461 ^ s_box[2][(r2 >> 16) & 0xFF]
462 ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
463 r1 = (t1 + t0 + keys[39-4*i]) ^ r1;
465 r0 = (t0 + keys[38-4*i]) ^ rol1(r0);
467 t1 = ( s_box[1][r1 & 0xFF]
468 ^ s_box[2][(r1 >> 8) & 0xFF]
469 ^ s_box[3][(r1 >> 16) & 0xFF]
470 ^ s_box[0][(r1 >> 24) & 0xFF]);
471 t0 = ( s_box[0][r0 & 0xFF]
472 ^ s_box[1][(r0 >> 8) & 0xFF]
473 ^ s_box[2][(r0 >> 16) & 0xFF]
474 ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
475 r3 = (t1 + t0 + keys[37-4*i]) ^ r3;
477 r2 = (t0 + keys[36-4*i]) ^ rol1(r2);
480 words[0] = r0 ^ keys[0];
481 words[1] = r1 ^ keys[1];
482 words[2] = r2 ^ keys[2];
483 words[3] = r3 ^ keys[3];
485 for (i = 0; i<4; i++, plaintext += 4)
486 LE_WRITE_UINT32(plaintext, words[i]);