1 /* salsa20.c - Bernstein's Salsa20 cipher
2 * Copyright (C) 2012 Simon Josefsson, Niels Möller
3 * Copyright (C) 2013 g10 Code GmbH
5 * This file is part of Libgcrypt.
7 * Libgcrypt is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser general Public License as
9 * published by the Free Software Foundation; either version 2.1 of
10 * the License, or (at your option) any later version.
12 * Libgcrypt is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20 * For a description of the algorithm, see:
21 * http://cr.yp.to/snuffle/spec.pdf
22 * http://cr.yp.to/snuffle/design.pdf
25 /* The code is based on the code in Nettle
26 (git commit id 9d2d8ddaee35b91a4e1a32ae77cba04bea3480e7)
27 which in turn is based on
28 salsa20-ref.c version 20051118
44 /* USE_AMD64 indicates whether to compile with AMD64 code. */
46 #if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
50 /* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
51 #undef USE_ARM_NEON_ASM
52 #ifdef ENABLE_NEON_SUPPORT
53 # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
54 && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
55 && defined(HAVE_GCC_INLINE_ASM_NEON)
56 # define USE_ARM_NEON_ASM 1
58 #endif /*ENABLE_NEON_SUPPORT*/
61 #define SALSA20_MIN_KEY_SIZE 16 /* Bytes. */
62 #define SALSA20_MAX_KEY_SIZE 32 /* Bytes. */
63 #define SALSA20_BLOCK_SIZE 64 /* Bytes. */
64 #define SALSA20_IV_SIZE 8 /* Bytes. */
65 #define SALSA20_INPUT_LENGTH 16 /* Bytes. */
67 /* Number of rounds. The standard uses 20 rounds. In any case the
68 number of rounds must be even. */
69 #define SALSA20_ROUNDS 20
70 #define SALSA20R12_ROUNDS 12
73 struct SALSA20_context_s;
75 typedef unsigned int (*salsa20_core_t) (u32 *dst, struct SALSA20_context_s *ctx,
77 typedef void (* salsa20_keysetup_t)(struct SALSA20_context_s *ctx,
78 const byte *key, int keylen);
79 typedef void (* salsa20_ivsetup_t)(struct SALSA20_context_s *ctx,
82 typedef struct SALSA20_context_s
84 /* Indices 1-4 and 11-14 holds the key (two identical copies for the
85 shorter key size), indices 0, 5, 10, 15 are constant, indices 6, 7
86 are the IV, and indices 8, 9 are the block counter:
93 u32 input[SALSA20_INPUT_LENGTH];
94 u32 pad[SALSA20_INPUT_LENGTH];
95 unsigned int unused; /* bytes in the pad. */
96 #ifdef USE_ARM_NEON_ASM
99 salsa20_keysetup_t keysetup;
100 salsa20_ivsetup_t ivsetup;
105 /* The masking of the right shift is needed to allow n == 0 (using
106 just 32 - n and 64 - n results in undefined behaviour). Most uses
107 of these macros use a constant and non-zero rotation count. */
108 #define ROTL32(n,x) (((x)<<(n)) | ((x)>>((-(n)&31))))
111 #define LE_SWAP32(v) le_bswap32(v)
113 #define LE_READ_UINT32(p) buf_get_le32(p)
116 static void salsa20_setiv (void *context, const byte *iv, size_t ivlen);
117 static const char *selftest (void);
121 /* AMD64 assembly implementations of Salsa20. */
122 void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits);
123 void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv);
125 _gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst,
126 size_t len, int rounds);
129 salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
131 _gcry_salsa20_amd64_keysetup(ctx->input, key, keylen * 8);
135 salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv)
137 _gcry_salsa20_amd64_ivsetup(ctx->input, iv);
141 salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
143 memset(dst, 0, SALSA20_BLOCK_SIZE);
144 return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds);
147 #else /* USE_AMD64 */
152 # define SALSA20_CORE_DEBUG(i) do { \
154 for (debug_j = 0; debug_j < 16; debug_j++) \
157 fprintf(stderr, "%2d:", (i)); \
158 else if (debug_j % 4 == 0) \
159 fprintf(stderr, "\n "); \
160 fprintf(stderr, " %8x", pad[debug_j]); \
162 fprintf(stderr, "\n"); \
165 # define SALSA20_CORE_DEBUG(i)
168 #define QROUND(x0, x1, x2, x3) \
170 x1 ^= ROTL32 ( 7, x0 + x3); \
171 x2 ^= ROTL32 ( 9, x1 + x0); \
172 x3 ^= ROTL32 (13, x2 + x1); \
173 x0 ^= ROTL32 (18, x3 + x2); \
177 salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned rounds)
179 u32 pad[SALSA20_INPUT_LENGTH], *src = ctx->input;
182 memcpy (pad, src, sizeof(pad));
183 for (i = 0; i < rounds; i += 2)
185 SALSA20_CORE_DEBUG (i);
186 QROUND (pad[0], pad[4], pad[8], pad[12]);
187 QROUND (pad[5], pad[9], pad[13], pad[1] );
188 QROUND (pad[10], pad[14], pad[2], pad[6] );
189 QROUND (pad[15], pad[3], pad[7], pad[11]);
191 SALSA20_CORE_DEBUG (i+1);
192 QROUND (pad[0], pad[1], pad[2], pad[3] );
193 QROUND (pad[5], pad[6], pad[7], pad[4] );
194 QROUND (pad[10], pad[11], pad[8], pad[9] );
195 QROUND (pad[15], pad[12], pad[13], pad[14]);
197 SALSA20_CORE_DEBUG (i);
199 for (i = 0; i < SALSA20_INPUT_LENGTH; i++)
201 u32 t = pad[i] + src[i];
202 dst[i] = LE_SWAP32 (t);
205 /* Update counter. */
210 return ( 3*sizeof (void*) \
213 + sizeof (unsigned int) \
217 #undef SALSA20_CORE_DEBUG
220 salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
222 /* These constants are the little endian encoding of the string
223 "expand 32-byte k". For the 128 bit variant, the "32" in that
224 string will be fixed up to "16". */
225 ctx->input[0] = 0x61707865; /* "apxe" */
226 ctx->input[5] = 0x3320646e; /* "3 dn" */
227 ctx->input[10] = 0x79622d32; /* "yb-2" */
228 ctx->input[15] = 0x6b206574; /* "k et" */
230 ctx->input[1] = LE_READ_UINT32(key + 0);
231 ctx->input[2] = LE_READ_UINT32(key + 4);
232 ctx->input[3] = LE_READ_UINT32(key + 8);
233 ctx->input[4] = LE_READ_UINT32(key + 12);
234 if (keylen == SALSA20_MAX_KEY_SIZE) /* 256 bits */
236 ctx->input[11] = LE_READ_UINT32(key + 16);
237 ctx->input[12] = LE_READ_UINT32(key + 20);
238 ctx->input[13] = LE_READ_UINT32(key + 24);
239 ctx->input[14] = LE_READ_UINT32(key + 28);
243 ctx->input[11] = ctx->input[1];
244 ctx->input[12] = ctx->input[2];
245 ctx->input[13] = ctx->input[3];
246 ctx->input[14] = ctx->input[4];
248 ctx->input[5] -= 0x02000000; /* Change to "1 dn". */
249 ctx->input[10] += 0x00000004; /* Change to "yb-6". */
253 static void salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv)
255 ctx->input[6] = LE_READ_UINT32(iv + 0);
256 ctx->input[7] = LE_READ_UINT32(iv + 4);
257 /* Reset the block counter. */
262 #endif /*!USE_AMD64*/
264 #ifdef USE_ARM_NEON_ASM
266 /* ARM NEON implementation of Salsa20. */
268 _gcry_arm_neon_salsa20_encrypt(void *c, const void *m, unsigned int nblks,
269 void *k, unsigned int rounds);
272 salsa20_core_neon (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
274 return _gcry_arm_neon_salsa20_encrypt(dst, NULL, 1, ctx->input, rounds);
277 static void salsa20_ivsetup_neon(SALSA20_context_t *ctx, const byte *iv)
279 memcpy(ctx->input + 8, iv, 8);
280 /* Reset the block counter. */
281 memset(ctx->input + 10, 0, 8);
285 salsa20_keysetup_neon(SALSA20_context_t *ctx, const byte *key, int klen)
287 static const unsigned char sigma32[16] = "expand 32-byte k";
288 static const unsigned char sigma16[16] = "expand 16-byte k";
292 memcpy (ctx->input, key, 16);
293 memcpy (ctx->input + 4, key, 16); /* Duplicate 128-bit key. */
294 memcpy (ctx->input + 12, sigma16, 16);
299 memcpy (ctx->input, key, 32);
300 memcpy (ctx->input + 12, sigma32, 16);
304 #endif /*USE_ARM_NEON_ASM*/
307 static gcry_err_code_t
308 salsa20_do_setkey (SALSA20_context_t *ctx,
309 const byte *key, unsigned int keylen)
311 static int initialized;
312 static const char *selftest_failed;
317 selftest_failed = selftest ();
319 log_error ("SALSA20 selftest failed (%s)\n", selftest_failed );
322 return GPG_ERR_SELFTEST_FAILED;
324 if (keylen != SALSA20_MIN_KEY_SIZE
325 && keylen != SALSA20_MAX_KEY_SIZE)
326 return GPG_ERR_INV_KEYLEN;
329 ctx->keysetup = salsa20_keysetup;
330 ctx->ivsetup = salsa20_ivsetup;
331 ctx->core = salsa20_core;
333 #ifdef USE_ARM_NEON_ASM
334 ctx->use_neon = (_gcry_get_hw_features () & HWF_ARM_NEON) != 0;
337 /* Use ARM NEON ops instead. */
338 ctx->keysetup = salsa20_keysetup_neon;
339 ctx->ivsetup = salsa20_ivsetup_neon;
340 ctx->core = salsa20_core_neon;
344 ctx->keysetup (ctx, key, keylen);
346 /* We default to a zero nonce. */
347 salsa20_setiv (ctx, NULL, 0);
353 static gcry_err_code_t
354 salsa20_setkey (void *context, const byte *key, unsigned int keylen)
356 SALSA20_context_t *ctx = (SALSA20_context_t *)context;
357 gcry_err_code_t rc = salsa20_do_setkey (ctx, key, keylen);
358 _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *));
364 salsa20_setiv (void *context, const byte *iv, size_t ivlen)
366 SALSA20_context_t *ctx = (SALSA20_context_t *)context;
367 byte tmp[SALSA20_IV_SIZE];
369 if (iv && ivlen != SALSA20_IV_SIZE)
370 log_info ("WARNING: salsa20_setiv: bad ivlen=%u\n", (u32)ivlen);
372 if (!iv || ivlen != SALSA20_IV_SIZE)
373 memset (tmp, 0, sizeof(tmp));
375 memcpy (tmp, iv, SALSA20_IV_SIZE);
377 ctx->ivsetup (ctx, tmp);
379 /* Reset the unused pad bytes counter. */
382 wipememory (tmp, sizeof(tmp));
387 /* Note: This function requires LENGTH > 0. */
389 salsa20_do_encrypt_stream (SALSA20_context_t *ctx,
390 byte *outbuf, const byte *inbuf,
391 size_t length, unsigned rounds)
393 unsigned int nburn, burn = 0;
397 unsigned char *p = (void*)ctx->pad;
400 gcry_assert (ctx->unused < SALSA20_BLOCK_SIZE);
405 buf_xor (outbuf, inbuf, p + SALSA20_BLOCK_SIZE - ctx->unused, n);
412 gcry_assert (!ctx->unused);
416 if (length >= SALSA20_BLOCK_SIZE)
418 size_t nblocks = length / SALSA20_BLOCK_SIZE;
419 burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf,
421 length -= SALSA20_BLOCK_SIZE * nblocks;
422 outbuf += SALSA20_BLOCK_SIZE * nblocks;
423 inbuf += SALSA20_BLOCK_SIZE * nblocks;
427 #ifdef USE_ARM_NEON_ASM
428 if (ctx->use_neon && length >= SALSA20_BLOCK_SIZE)
430 unsigned int nblocks = length / SALSA20_BLOCK_SIZE;
431 _gcry_arm_neon_salsa20_encrypt (outbuf, inbuf, nblocks, ctx->input,
433 length -= SALSA20_BLOCK_SIZE * nblocks;
434 outbuf += SALSA20_BLOCK_SIZE * nblocks;
435 inbuf += SALSA20_BLOCK_SIZE * nblocks;
441 /* Create the next pad and bump the block counter. Note that it
442 is the user's duty to change to another nonce not later than
443 after 2^70 processed bytes. */
444 nburn = ctx->core (ctx->pad, ctx, rounds);
445 burn = nburn > burn ? nburn : burn;
447 if (length <= SALSA20_BLOCK_SIZE)
449 buf_xor (outbuf, inbuf, ctx->pad, length);
450 ctx->unused = SALSA20_BLOCK_SIZE - length;
453 buf_xor (outbuf, inbuf, ctx->pad, SALSA20_BLOCK_SIZE);
454 length -= SALSA20_BLOCK_SIZE;
455 outbuf += SALSA20_BLOCK_SIZE;
456 inbuf += SALSA20_BLOCK_SIZE;
459 _gcry_burn_stack (burn);
464 salsa20_encrypt_stream (void *context,
465 byte *outbuf, const byte *inbuf, size_t length)
467 SALSA20_context_t *ctx = (SALSA20_context_t *)context;
470 salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20_ROUNDS);
475 salsa20r12_encrypt_stream (void *context,
476 byte *outbuf, const byte *inbuf, size_t length)
478 SALSA20_context_t *ctx = (SALSA20_context_t *)context;
481 salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20R12_ROUNDS);
488 SALSA20_context_t ctx;
493 static byte key_1[] =
494 { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
498 static const byte nonce_1[] =
499 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
500 static const byte plaintext_1[] =
501 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
502 static const byte ciphertext_1[] =
503 { 0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3};
505 salsa20_setkey (&ctx, key_1, sizeof key_1);
506 salsa20_setiv (&ctx, nonce_1, sizeof nonce_1);
508 salsa20_encrypt_stream (&ctx, scratch, plaintext_1, sizeof plaintext_1);
509 if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1))
510 return "Salsa20 encryption test 1 failed.";
512 return "Salsa20 wrote too much.";
513 salsa20_setkey( &ctx, key_1, sizeof(key_1));
514 salsa20_setiv (&ctx, nonce_1, sizeof nonce_1);
515 salsa20_encrypt_stream (&ctx, scratch, scratch, sizeof plaintext_1);
516 if (memcmp (scratch, plaintext_1, sizeof plaintext_1))
517 return "Salsa20 decryption test 1 failed.";
519 for (i = 0; i < sizeof buf; i++)
521 salsa20_setkey (&ctx, key_1, sizeof key_1);
522 salsa20_setiv (&ctx, nonce_1, sizeof nonce_1);
524 salsa20_encrypt_stream (&ctx, buf, buf, sizeof buf);
526 salsa20_setkey (&ctx, key_1, sizeof key_1);
527 salsa20_setiv (&ctx, nonce_1, sizeof nonce_1);
528 salsa20_encrypt_stream (&ctx, buf, buf, 1);
529 salsa20_encrypt_stream (&ctx, buf+1, buf+1, (sizeof buf)-1-1);
530 salsa20_encrypt_stream (&ctx, buf+(sizeof buf)-1, buf+(sizeof buf)-1, 1);
531 for (i = 0; i < sizeof buf; i++)
532 if (buf[i] != (byte)i)
533 return "Salsa20 encryption test 2 failed.";
539 gcry_cipher_spec_t _gcry_cipher_spec_salsa20 =
543 "SALSA20", /* name */
546 1, /* blocksize in bytes. */
547 SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */
548 sizeof (SALSA20_context_t),
552 salsa20_encrypt_stream,
553 salsa20_encrypt_stream,
559 gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12 =
561 GCRY_CIPHER_SALSA20R12,
563 "SALSA20R12", /* name */
566 1, /* blocksize in bytes. */
567 SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */
568 sizeof (SALSA20_context_t),
572 salsa20r12_encrypt_stream,
573 salsa20r12_encrypt_stream,