twofish.c

   1 /* twofish.c
   2  *
   3  * The twofish block cipher.
   4  */
   5
   6 /* twofish - An implementation of the twofish cipher.
   7  * Copyright (C) 1999 Ruud de Rooij <ruud@debian.org>
   8  *
   9  * Modifications for lsh, integrated testing
  10  * Copyright (C) 1999 J.H.M. Dassen (Ray) <jdassen@wi.LeidenUniv.nl>
  11  *
  12  * Integrated with the nettle library,
  13  * Copyright (C) 2001 Niels Möller
  14  */
  15
  16 /* nettle, low-level cryptographics library
  17  *
  18  * The nettle library is free software; you can redistribute it and/or modify
  19  * it under the terms of the GNU Lesser General Public License as published by
  20  * the Free Software Foundation; either version 2.1 of the License, or (at your
  21  * option) any later version.
  22  *
  23  * The nettle Library is distributed in the hope that it will be useful, but
  24  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  26  * License for more details.
  27  *
  28  * You should have received a copy of the GNU Lesser General Public License
  29  * along with the nettle library; see the file COPYING.LIB.  If not, write to
  30  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  31  * MA 02111-1301, USA.
  32  */
  33
  34 #if HAVE_CONFIG_H
  35 # include "config.h"
  36 #endif
  37
  38 #include <assert.h>
  39 #include <string.h>
  40
  41 #include "twofish.h"
  42
  43 #include "macros.h"
  44
  45 /* Bitwise rotations on 32-bit words.  These are defined as macros that
  46  * evaluate their argument twice, so do not apply to any expressions with
  47  * side effects.
  48  */
  49
  50 #define rol1(x) (((x) << 1) | (((x) & 0x80000000) >> 31))
  51 #define rol8(x) (((x) << 8) | (((x) & 0xFF000000) >> 24))
  52 #define rol9(x) (((x) << 9) | (((x) & 0xFF800000) >> 23))
  53 #define ror1(x) (((x) >> 1) | (((x) & 0x00000001) << 31))
  54
  55 /* ------------------------------------------------------------------------- */
  56
  57 /* The permutations q0 and q1.  These are fixed permutations on 8-bit values.
  58  * The permutations have been computed using the program twofish-data,
  59  * which is distributed along with this file.
  60  */
  61
  62 static const uint8_t q0[256] = {
  63   0xA9,0x67,0xB3,0xE8,0x04,0xFD,0xA3,0x76,
  64   0x9A,0x92,0x80,0x78,0xE4,0xDD,0xD1,0x38,
  65   0x0D,0xC6,0x35,0x98,0x18,0xF7,0xEC,0x6C,
  66   0x43,0x75,0x37,0x26,0xFA,0x13,0x94,0x48,
  67   0xF2,0xD0,0x8B,0x30,0x84,0x54,0xDF,0x23,
  68   0x19,0x5B,0x3D,0x59,0xF3,0xAE,0xA2,0x82,
  69   0x63,0x01,0x83,0x2E,0xD9,0x51,0x9B,0x7C,
  70   0xA6,0xEB,0xA5,0xBE,0x16,0x0C,0xE3,0x61,
  71   0xC0,0x8C,0x3A,0xF5,0x73,0x2C,0x25,0x0B,
  72   0xBB,0x4E,0x89,0x6B,0x53,0x6A,0xB4,0xF1,
  73   0xE1,0xE6,0xBD,0x45,0xE2,0xF4,0xB6,0x66,
  74   0xCC,0x95,0x03,0x56,0xD4,0x1C,0x1E,0xD7,
  75   0xFB,0xC3,0x8E,0xB5,0xE9,0xCF,0xBF,0xBA,
  76   0xEA,0x77,0x39,0xAF,0x33,0xC9,0x62,0x71,
  77   0x81,0x79,0x09,0xAD,0x24,0xCD,0xF9,0xD8,
  78   0xE5,0xC5,0xB9,0x4D,0x44,0x08,0x86,0xE7,
  79   0xA1,0x1D,0xAA,0xED,0x06,0x70,0xB2,0xD2,
  80   0x41,0x7B,0xA0,0x11,0x31,0xC2,0x27,0x90,
  81   0x20,0xF6,0x60,0xFF,0x96,0x5C,0xB1,0xAB,
  82   0x9E,0x9C,0x52,0x1B,0x5F,0x93,0x0A,0xEF,
  83   0x91,0x85,0x49,0xEE,0x2D,0x4F,0x8F,0x3B,
  84   0x47,0x87,0x6D,0x46,0xD6,0x3E,0x69,0x64,
  85   0x2A,0xCE,0xCB,0x2F,0xFC,0x97,0x05,0x7A,
  86   0xAC,0x7F,0xD5,0x1A,0x4B,0x0E,0xA7,0x5A,
  87   0x28,0x14,0x3F,0x29,0x88,0x3C,0x4C,0x02,
  88   0xB8,0xDA,0xB0,0x17,0x55,0x1F,0x8A,0x7D,
  89   0x57,0xC7,0x8D,0x74,0xB7,0xC4,0x9F,0x72,
  90   0x7E,0x15,0x22,0x12,0x58,0x07,0x99,0x34,
  91   0x6E,0x50,0xDE,0x68,0x65,0xBC,0xDB,0xF8,
  92   0xC8,0xA8,0x2B,0x40,0xDC,0xFE,0x32,0xA4,
  93   0xCA,0x10,0x21,0xF0,0xD3,0x5D,0x0F,0x00,
  94   0x6F,0x9D,0x36,0x42,0x4A,0x5E,0xC1,0xE0,
  95 };
  96
  97 static const uint8_t q1[256] = {
  98   0x75,0xF3,0xC6,0xF4,0xDB,0x7B,0xFB,0xC8,
  99   0x4A,0xD3,0xE6,0x6B,0x45,0x7D,0xE8,0x4B,
 100   0xD6,0x32,0xD8,0xFD,0x37,0x71,0xF1,0xE1,
 101   0x30,0x0F,0xF8,0x1B,0x87,0xFA,0x06,0x3F,
 102   0x5E,0xBA,0xAE,0x5B,0x8A,0x00,0xBC,0x9D,
 103   0x6D,0xC1,0xB1,0x0E,0x80,0x5D,0xD2,0xD5,
 104   0xA0,0x84,0x07,0x14,0xB5,0x90,0x2C,0xA3,
 105   0xB2,0x73,0x4C,0x54,0x92,0x74,0x36,0x51,
 106   0x38,0xB0,0xBD,0x5A,0xFC,0x60,0x62,0x96,
 107   0x6C,0x42,0xF7,0x10,0x7C,0x28,0x27,0x8C,
 108   0x13,0x95,0x9C,0xC7,0x24,0x46,0x3B,0x70,
 109   0xCA,0xE3,0x85,0xCB,0x11,0xD0,0x93,0xB8,
 110   0xA6,0x83,0x20,0xFF,0x9F,0x77,0xC3,0xCC,
 111   0x03,0x6F,0x08,0xBF,0x40,0xE7,0x2B,0xE2,
 112   0x79,0x0C,0xAA,0x82,0x41,0x3A,0xEA,0xB9,
 113   0xE4,0x9A,0xA4,0x97,0x7E,0xDA,0x7A,0x17,
 114   0x66,0x94,0xA1,0x1D,0x3D,0xF0,0xDE,0xB3,
 115   0x0B,0x72,0xA7,0x1C,0xEF,0xD1,0x53,0x3E,
 116   0x8F,0x33,0x26,0x5F,0xEC,0x76,0x2A,0x49,
 117   0x81,0x88,0xEE,0x21,0xC4,0x1A,0xEB,0xD9,
 118   0xC5,0x39,0x99,0xCD,0xAD,0x31,0x8B,0x01,
 119   0x18,0x23,0xDD,0x1F,0x4E,0x2D,0xF9,0x48,
 120   0x4F,0xF2,0x65,0x8E,0x78,0x5C,0x58,0x19,
 121   0x8D,0xE5,0x98,0x57,0x67,0x7F,0x05,0x64,
 122   0xAF,0x63,0xB6,0xFE,0xF5,0xB7,0x3C,0xA5,
 123   0xCE,0xE9,0x68,0x44,0xE0,0x4D,0x43,0x69,
 124   0x29,0x2E,0xAC,0x15,0x59,0xA8,0x0A,0x9E,
 125   0x6E,0x47,0xDF,0x34,0x35,0x6A,0xCF,0xDC,
 126   0x22,0xC9,0xC0,0x9B,0x89,0xD4,0xED,0xAB,
 127   0x12,0xA2,0x0D,0x52,0xBB,0x02,0x2F,0xA9,
 128   0xD7,0x61,0x1E,0xB4,0x50,0x04,0xF6,0xC2,
 129   0x16,0x25,0x86,0x56,0x55,0x09,0xBE,0x91,
 130 };
 131
 132 /* ------------------------------------------------------------------------- */
 133
 134 /* uint8_t gf_multiply(uint8_t p, uint8_t a, uint8_t b)
 135  *
 136  * Multiplication in GF(2^8).
 137  *
 138  * This function multiplies a times b in the Galois Field GF(2^8) with
 139  * primitive polynomial p.
 140  * The representation of the polynomials a, b, and p uses bits with
 141  * values 2^i to represent the terms x^i.  The polynomial p contains an
 142  * implicit term x^8.
 143  *
 144  * Note that addition and subtraction in GF(2^8) is simply the XOR
 145  * operation.
 146  */
 147
 148 static uint8_t
 149 gf_multiply(uint8_t p, uint8_t a, uint8_t b)
 150 {
 151   uint32_t shift  = b;
 152   uint8_t result = 0;
 153   while (a)
 154     {
 155       if (a & 1) result ^= shift;
 156       a = a >> 1;
 157       shift = shift << 1;
 158       if (shift & 0x100) shift ^= p;
 159     }
 160   return result;
 161 }
 162
 163 /* ------------------------------------------------------------------------- */
 164
 165 /* The matrix RS as specified in section 4.3 the twofish paper. */
 166
 167 static const uint8_t rs_matrix[4][8] = {
 168     { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E },
 169     { 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5 },
 170     { 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19 },
 171     { 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03 } };
 172
 173 /* uint32_t compute_s(uint32_t m1, uint32_t m2);
 174  *
 175  * Computes the value RS * M, where M is a byte vector composed of the
 176  * bytes of m1 and m2.  Arithmetic is done in GF(2^8) with primitive
 177  * polynomial x^8 + x^6 + x^3 + x^2 + 1.
 178  *
 179  * This function is used to compute the sub-keys S which are in turn used
 180  * to generate the S-boxes.
 181  */
 182
 183 static uint32_t
 184 compute_s(uint32_t m1, uint32_t m2)
 185 {
 186   uint32_t s = 0;
 187   int i;
 188   for (i = 0; i < 4; i++)
 189     s |=  ((  gf_multiply(0x4D, m1,       rs_matrix[i][0])
 190             ^ gf_multiply(0x4D, m1 >> 8,  rs_matrix[i][1])
 191             ^ gf_multiply(0x4D, m1 >> 16, rs_matrix[i][2])
 192             ^ gf_multiply(0x4D, m1 >> 24, rs_matrix[i][3])
 193             ^ gf_multiply(0x4D, m2,       rs_matrix[i][4])
 194             ^ gf_multiply(0x4D, m2 >> 8,  rs_matrix[i][5])
 195             ^ gf_multiply(0x4D, m2 >> 16, rs_matrix[i][6])
 196             ^ gf_multiply(0x4D, m2 >> 24, rs_matrix[i][7])) << (i*8));
 197   return s;
 198 }
 199
 200 /* ------------------------------------------------------------------------- */
 201
 202 /* This table describes which q S-boxes are used for each byte in each stage
 203  * of the function h, cf. figure 2 of the twofish paper.
 204  */
 205
 206 static const uint8_t * const q_table[4][5] =
 207   { { q1, q1, q0, q0, q1 },
 208     { q0, q1, q1, q0, q0 },
 209     { q0, q0, q0, q1, q1 },
 210     { q1, q0, q1, q1, q0 } };
 211
 212 /* The matrix MDS as specified in section 4.3.2 of the twofish paper. */
 213
 214 static const uint8_t mds_matrix[4][4] = { { 0x01, 0xEF, 0x5B, 0x5B },
 215                                  { 0x5B, 0xEF, 0xEF, 0x01 },
 216                                  { 0xEF, 0x5B, 0x01, 0xEF },
 217                                  { 0xEF, 0x01, 0xEF, 0x5B } };
 218
 219 /* uint32_t h_uint8_t(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3);
 220  *
 221  * Perform the h function (section 4.3.2) on one byte.  It consists of
 222  * repeated applications of the q permutation, followed by a XOR with
 223  * part of a sub-key.  Finally, the value is multiplied by one column of
 224  * the MDS matrix.  To obtain the result for a full word, the results of
 225  * h for the individual bytes are XORed.
 226  *
 227  * k is the key size (/ 64 bits), i is the byte number (0 = LSB), x is the
 228  * actual byte to apply the function to; l0, l1, l2, and l3 are the
 229  * appropriate bytes from the subkey.  Note that only l0..l(k-1) are used.
 230  */
 231
 232 static uint32_t
 233 h_byte(int k, int i, uint8_t x, uint8_t l0, uint8_t l1, uint8_t l2, uint8_t l3)
 234 {
 235   uint8_t y = q_table[i][4][l0 ^
 236             q_table[i][3][l1 ^
 237               q_table[i][2][k == 2 ? x : l2 ^
 238                 q_table[i][1][k == 3 ? x : l3 ^ q_table[i][0][x]]]]];
 239
 240   return ( ((uint32_t)gf_multiply(0x69, mds_matrix[0][i], y))
 241            | ((uint32_t)gf_multiply(0x69, mds_matrix[1][i], y) << 8)
 242            | ((uint32_t)gf_multiply(0x69, mds_matrix[2][i], y) << 16)
 243            | ((uint32_t)gf_multiply(0x69, mds_matrix[3][i], y) << 24) );
 244 }
 245
 246 /* uint32_t h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3);
 247  *
 248  * Perform the function h on a word.  See the description of h_byte() above.
 249  */
 250
 251 static uint32_t
 252 h(int k, uint8_t x, uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3)
 253 {
 254   return (  h_byte(k, 0, x, l0,       l1,       l2,       l3)
 255           ^ h_byte(k, 1, x, l0 >> 8,  l1 >> 8,  l2 >> 8,  l3 >> 8)
 256           ^ h_byte(k, 2, x, l0 >> 16, l1 >> 16, l2 >> 16, l3 >> 16)
 257           ^ h_byte(k, 3, x, l0 >> 24, l1 >> 24, l2 >> 24, l3 >> 24) );
 258 }
 259
 260
 261 /* ------------------------------------------------------------------------- */
 262
 263 /* API */
 264
 265 /* Structure which contains the tables containing the subkeys and the
 266  * key-dependent s-boxes.
 267  */
 268
 269
 270 /* Set up internal tables required for twofish encryption and decryption.
 271  *
 272  * The key size is specified in bytes.  Key sizes up to 32 bytes are
 273  * supported.  Larger key sizes are silently truncated.
 274  */
 275
 276 void
 277 twofish_set_key(struct twofish_ctx *context,
 278                 unsigned keysize, const uint8_t *key)
 279 {
 280   uint8_t key_copy[32];
 281   uint32_t m[8], s[4], t;
 282   int i, j, k;
 283
 284   /* Extend key as necessary */
 285
 286   assert(keysize <= 32);
 287
 288   /* We do a little more copying than necessary, but that doesn't
 289    * really matter. */
 290   memset(key_copy, 0, 32);
 291   memcpy(key_copy, key, keysize);
 292
 293   for (i = 0; i<8; i++)
 294     m[i] = LE_READ_UINT32(key_copy + i*4);
 295
 296   if (keysize <= 16)
 297     k = 2;
 298   else if (keysize <= 24)
 299     k = 3;
 300   else
 301     k = 4;
 302
 303   /* Compute sub-keys */
 304
 305   for (i = 0; i < 20; i++)
 306     {
 307       t = h(k, 2*i+1, m[1], m[3], m[5], m[7]);
 308       t = rol8(t);
 309       t += (context->keys[2*i] =
 310             t + h(k, 2*i, m[0], m[2], m[4], m[6]));
 311       t = rol9(t);
 312       context->keys[2*i+1] = t;
 313     }
 314
 315   /* Compute key-dependent S-boxes */
 316
 317   for (i = 0; i < k; i++)
 318     s[k-1-i] = compute_s(m[2*i], m[2*i+1]);
 319
 320   for (i = 0; i < 4; i++)
 321     for (j = 0; j < 256; j++)
 322       context->s_box[i][j] = h_byte(k, i, j,
 323                                     s[0] >> (i*8),
 324                                     s[1] >> (i*8),
 325                                     s[2] >> (i*8),
 326                                     s[3] >> (i*8));
 327 }
 328
 329 /* Encrypt blocks of 16 bytes of data with the twofish algorithm.
 330  *
 331  * Before this function can be used, twofish_set_key() must be used in order to
 332  * set up various tables required for the encryption algorithm.
 333  *
 334  * This function always encrypts 16 bytes of plaintext to 16 bytes of
 335  * ciphertext.  The memory areas of the plaintext and the ciphertext can
 336  * overlap.
 337  */
 338
 339 void
 340 twofish_encrypt(const struct twofish_ctx *context,
 341                 unsigned length,
 342                 uint8_t *ciphertext,
 343                 const uint8_t *plaintext)
 344 {
 345   const uint32_t * keys        = context->keys;
 346   const uint32_t (*s_box)[256] = context->s_box;
 347
 348   assert( !(length % TWOFISH_BLOCK_SIZE) );
 349   for ( ; length; length -= TWOFISH_BLOCK_SIZE)
 350     {
 351       uint32_t words[4];
 352       uint32_t r0, r1, r2, r3, t0, t1;
 353       int i;
 354
 355       for (i = 0; i<4; i++, plaintext += 4)
 356         words[i] = LE_READ_UINT32(plaintext);
 357
 358       r0 = words[0] ^ keys[0];
 359       r1 = words[1] ^ keys[1];
 360       r2 = words[2] ^ keys[2];
 361       r3 = words[3] ^ keys[3];
 362
 363       for (i = 0; i < 8; i++) {
 364         t1 = (  s_box[1][r1 & 0xFF]
 365                 ^ s_box[2][(r1 >> 8) & 0xFF]
 366                 ^ s_box[3][(r1 >> 16) & 0xFF]
 367                 ^ s_box[0][(r1 >> 24) & 0xFF]);
 368         t0 = (  s_box[0][r0 & 0xFF]
 369                 ^ s_box[1][(r0 >> 8) & 0xFF]
 370                 ^ s_box[2][(r0 >> 16) & 0xFF]
 371                 ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
 372         r3 = (t1 + t0 + keys[4*i+9]) ^ rol1(r3);
 373         r2 = (t0 + keys[4*i+8]) ^ r2;
 374         r2 = ror1(r2);
 375
 376         t1 = (  s_box[1][r3 & 0xFF]
 377                 ^ s_box[2][(r3 >> 8) & 0xFF]
 378                 ^ s_box[3][(r3 >> 16) & 0xFF]
 379                 ^ s_box[0][(r3 >> 24) & 0xFF]);
 380         t0 = (  s_box[0][r2 & 0xFF]
 381                 ^ s_box[1][(r2 >> 8) & 0xFF]
 382                 ^ s_box[2][(r2 >> 16) & 0xFF]
 383                 ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
 384         r1 = (t1 + t0 + keys[4*i+11]) ^ rol1(r1);
 385         r0 = (t0 + keys[4*i+10]) ^ r0;
 386         r0 = ror1(r0);
 387       }
 388
 389       words[0] = r2 ^ keys[4];
 390       words[1] = r3 ^ keys[5];
 391       words[2] = r0 ^ keys[6];
 392       words[3] = r1 ^ keys[7];
 393
 394       for (i = 0; i<4; i++, ciphertext += 4)
 395         LE_WRITE_UINT32(ciphertext, words[i]);
 396     }
 397 }
 398
 399 /* Decrypt blocks of 16 bytes of data with the twofish algorithm.
 400  *
 401  * Before this function can be used, twofish_set_key() must be used in order to
 402  * set up various tables required for the decryption algorithm.
 403  *
 404  * This function always decrypts 16 bytes of ciphertext to 16 bytes of
 405  * plaintext.  The memory areas of the plaintext and the ciphertext can
 406  * overlap.
 407  */
 408
 409 void
 410 twofish_decrypt(const struct twofish_ctx *context,
 411                 unsigned length,
 412                 uint8_t *plaintext,
 413                 const uint8_t *ciphertext)
 414
 415 {
 416   const uint32_t *keys  = context->keys;
 417   const uint32_t (*s_box)[256] = context->s_box;
 418
 419   assert( !(length % TWOFISH_BLOCK_SIZE) );
 420   for ( ; length; length -= TWOFISH_BLOCK_SIZE)
 421     {
 422       uint32_t words[4];
 423       uint32_t r0, r1, r2, r3, t0, t1;
 424       int i;
 425
 426       for (i = 0; i<4; i++, ciphertext += 4)
 427         words[i] = LE_READ_UINT32(ciphertext);
 428
 429       r0 = words[2] ^ keys[6];
 430       r1 = words[3] ^ keys[7];
 431       r2 = words[0] ^ keys[4];
 432       r3 = words[1] ^ keys[5];
 433
 434       for (i = 0; i < 8; i++) {
 435         t1 = (  s_box[1][r3 & 0xFF]
 436                 ^ s_box[2][(r3 >> 8) & 0xFF]
 437                 ^ s_box[3][(r3 >> 16) & 0xFF]
 438                 ^ s_box[0][(r3 >> 24) & 0xFF]);
 439         t0 = (  s_box[0][r2 & 0xFF]
 440                 ^ s_box[1][(r2 >> 8) & 0xFF]
 441                 ^ s_box[2][(r2 >> 16) & 0xFF]
 442                 ^ s_box[3][(r2 >> 24) & 0xFF]) + t1;
 443         r1 = (t1 + t0 + keys[39-4*i]) ^ r1;
 444         r1 = ror1(r1);
 445         r0 = (t0 + keys[38-4*i]) ^ rol1(r0);
 446
 447         t1 = (  s_box[1][r1 & 0xFF]
 448                 ^ s_box[2][(r1 >> 8) & 0xFF]
 449                 ^ s_box[3][(r1 >> 16) & 0xFF]
 450                 ^ s_box[0][(r1 >> 24) & 0xFF]);
 451         t0 = (  s_box[0][r0 & 0xFF]
 452                 ^ s_box[1][(r0 >> 8) & 0xFF]
 453                 ^ s_box[2][(r0 >> 16) & 0xFF]
 454                 ^ s_box[3][(r0 >> 24) & 0xFF]) + t1;
 455         r3 = (t1 + t0 + keys[37-4*i]) ^ r3;
 456         r3 = ror1(r3);
 457         r2 = (t0 + keys[36-4*i]) ^ rol1(r2);
 458       }
 459
 460       words[0] = r0 ^ keys[0];
 461       words[1] = r1 ^ keys[1];
 462       words[2] = r2 ^ keys[2];
 463       words[3] = r3 ^ keys[3];
 464
 465       for (i = 0; i<4; i++, plaintext += 4)
 466         LE_WRITE_UINT32(plaintext, words[i]);
 467     }
 468 }