1 /* serpent.c - Implementation of the Serpent encryption algorithm.
2 * Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 * This file is part of Libgcrypt.
6 * Libgcrypt is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser general Public License as
8 * published by the Free Software Foundation; either version 2.1 of
9 * the License, or (at your option) any later version.
11 * Libgcrypt is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
32 #include "cipher-selftest.h"
35 /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
37 #if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
41 /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
43 #if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
44 # if defined(ENABLE_AVX2_SUPPORT)
49 /* USE_NEON indicates whether to enable ARM NEON assembly code. */
51 #ifdef ENABLE_NEON_SUPPORT
52 # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
53 && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
54 && defined(HAVE_GCC_INLINE_ASM_NEON)
57 #endif /*ENABLE_NEON_SUPPORT*/
59 /* Number of rounds per Serpent encrypt/decrypt operation. */
62 /* Magic number, used during generating of the subkeys. */
63 #define PHI 0x9E3779B9
65 /* Serpent works on 128 bit blocks. */
66 typedef u32 serpent_block_t[4];
68 /* Serpent key, provided by the user. If the original key is shorter
69 than 256 bits, it is padded. */
70 typedef u32 serpent_key_t[8];
72 /* The key schedule consists of 33 128 bit subkeys. */
73 typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
75 /* A Serpent context. */
76 typedef struct serpent_context
78 serpent_subkeys_t keys; /* Generated subkeys. */
90 /* Assembler implementations of Serpent using SSE2. Process 8 block in
93 extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
95 const unsigned char *in,
98 extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
100 const unsigned char *in,
103 extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
105 const unsigned char *in,
110 /* Assembler implementations of Serpent using SSE2. Process 16 block in
113 extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
115 const unsigned char *in,
118 extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
120 const unsigned char *in,
123 extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
125 const unsigned char *in,
130 /* Assembler implementations of Serpent using ARM NEON. Process 8 block in
133 extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
135 const unsigned char *in,
138 extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
140 const unsigned char *in,
143 extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
145 const unsigned char *in,
151 static const char *serpent_test (void);
155 * These are the S-Boxes of Serpent from following research paper.
157 * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
158 * (New York, New York, USA), p. 317–329, National Institute of Standards and
161 * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
165 #define SBOX0(r0, r1, r2, r3, w, x, y, z) \
170 r1 &= r3; r4 ^= r2; \
171 r1 ^= r0; r0 |= r3; \
172 r0 ^= r4; r4 ^= r3; \
173 r3 ^= r2; r2 |= r1; \
174 r2 ^= r4; r4 = ~r4; \
175 r4 |= r1; r1 ^= r3; \
176 r1 ^= r4; r3 |= r0; \
177 r1 ^= r3; r4 ^= r3; \
179 w = r1; x = r4; y = r2; z = r0; \
182 #define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
187 r1 |= r0; r4 = ~r4; \
188 r1 ^= r2; r2 |= r4; \
189 r1 ^= r3; r0 ^= r4; \
190 r2 ^= r0; r0 &= r3; \
191 r4 ^= r0; r0 |= r1; \
192 r0 ^= r2; r3 ^= r4; \
193 r2 ^= r1; r3 ^= r0; \
198 w = r0; x = r4; y = r1; z = r3; \
201 #define SBOX1(r0, r1, r2, r3, w, x, y, z) \
205 r0 = ~r0; r2 = ~r2; \
207 r2 ^= r0; r0 |= r3; \
208 r3 ^= r2; r1 ^= r0; \
209 r0 ^= r4; r4 |= r1; \
210 r1 ^= r3; r2 |= r0; \
211 r2 &= r4; r0 ^= r1; \
213 r1 ^= r0; r0 &= r2; \
216 w = r2; x = r0; y = r3; z = r1; \
219 #define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
224 r3 &= r1; r4 ^= r2; \
225 r3 ^= r0; r0 |= r1; \
226 r2 ^= r3; r0 ^= r4; \
227 r0 |= r2; r1 ^= r3; \
228 r0 ^= r1; r1 |= r3; \
229 r1 ^= r0; r4 = ~r4; \
230 r4 ^= r1; r1 |= r0; \
235 w = r4; x = r0; y = r3; z = r2; \
238 #define SBOX2(r0, r1, r2, r3, w, x, y, z) \
243 r0 ^= r3; r2 ^= r1; \
244 r2 ^= r0; r3 |= r4; \
245 r3 ^= r1; r4 ^= r2; \
247 r3 ^= r0; r0 &= r1; \
248 r4 ^= r0; r1 ^= r3; \
249 r1 ^= r4; r4 = ~r4; \
251 w = r2; x = r3; y = r1; z = r4; \
254 #define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
258 r2 ^= r3; r3 ^= r0; \
260 r3 ^= r1; r1 |= r2; \
261 r1 ^= r4; r4 &= r3; \
262 r2 ^= r3; r4 &= r0; \
263 r4 ^= r2; r2 &= r1; \
264 r2 |= r0; r3 = ~r3; \
265 r2 ^= r3; r0 ^= r3; \
266 r0 &= r1; r3 ^= r4; \
269 w = r1; x = r4; y = r2; z = r3; \
272 #define SBOX3(r0, r1, r2, r3, w, x, y, z) \
277 r3 ^= r1; r1 &= r4; \
278 r4 ^= r2; r2 ^= r3; \
279 r3 &= r0; r4 |= r1; \
280 r3 ^= r4; r0 ^= r1; \
281 r4 &= r0; r1 ^= r3; \
282 r4 ^= r2; r1 |= r0; \
283 r1 ^= r2; r0 ^= r3; \
287 w = r1; x = r2; y = r3; z = r4; \
290 #define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
295 r0 ^= r2; r4 &= r2; \
296 r4 ^= r0; r0 &= r1; \
297 r1 ^= r3; r3 |= r4; \
298 r2 ^= r3; r0 ^= r3; \
299 r1 ^= r4; r3 &= r2; \
300 r3 ^= r1; r1 ^= r0; \
301 r1 |= r2; r0 ^= r3; \
305 w = r2; x = r1; y = r3; z = r0; \
308 #define SBOX4(r0, r1, r2, r3, w, x, y, z) \
312 r1 ^= r3; r3 = ~r3; \
313 r2 ^= r3; r3 ^= r0; \
315 r1 ^= r2; r4 ^= r3; \
316 r0 ^= r4; r2 &= r4; \
317 r2 ^= r0; r0 &= r1; \
318 r3 ^= r0; r4 |= r1; \
319 r4 ^= r0; r0 |= r3; \
320 r0 ^= r2; r2 &= r3; \
321 r0 = ~r0; r4 ^= r2; \
323 w = r1; x = r4; y = r0; z = r3; \
326 #define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
331 r2 ^= r1; r1 |= r3; \
332 r1 &= r0; r4 ^= r2; \
333 r4 ^= r1; r1 &= r2; \
334 r0 = ~r0; r3 ^= r4; \
335 r1 ^= r3; r3 &= r0; \
336 r3 ^= r2; r0 ^= r1; \
337 r2 &= r0; r3 ^= r0; \
339 r2 |= r3; r3 ^= r0; \
342 w = r0; x = r3; y = r2; z = r4; \
345 #define SBOX5(r0, r1, r2, r3, w, x, y, z) \
349 r0 ^= r1; r1 ^= r3; \
351 r1 &= r0; r2 ^= r3; \
352 r1 ^= r2; r2 |= r4; \
353 r4 ^= r3; r3 &= r1; \
354 r3 ^= r0; r4 ^= r1; \
355 r4 ^= r2; r2 ^= r0; \
356 r0 &= r3; r2 = ~r2; \
357 r0 ^= r4; r4 |= r3; \
360 w = r1; x = r3; y = r0; z = r2; \
363 #define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
368 r2 ^= r1; r3 |= r0; \
369 r3 ^= r2; r2 |= r1; \
370 r2 &= r0; r4 ^= r3; \
371 r2 ^= r4; r4 |= r0; \
372 r4 ^= r1; r1 &= r2; \
373 r1 ^= r3; r4 ^= r2; \
374 r3 &= r4; r4 ^= r1; \
375 r3 ^= r4; r4 = ~r4; \
378 w = r1; x = r4; y = r3; z = r2; \
381 #define SBOX6(r0, r1, r2, r3, w, x, y, z) \
386 r3 &= r0; r0 ^= r4; \
387 r3 ^= r2; r2 |= r4; \
388 r1 ^= r3; r2 ^= r0; \
389 r0 |= r1; r2 ^= r1; \
390 r4 ^= r0; r0 |= r3; \
391 r0 ^= r2; r4 ^= r3; \
392 r4 ^= r0; r3 = ~r3; \
396 w = r0; x = r1; y = r4; z = r2; \
399 #define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
404 r2 &= r0; r4 ^= r3; \
405 r2 = ~r2; r3 ^= r1; \
406 r2 ^= r3; r4 |= r0; \
407 r0 ^= r2; r3 ^= r4; \
408 r4 ^= r1; r1 &= r3; \
409 r1 ^= r0; r0 ^= r3; \
410 r0 |= r2; r3 ^= r1; \
413 w = r1; x = r2; y = r4; z = r3; \
416 #define SBOX7(r0, r1, r2, r3, w, x, y, z) \
421 r1 ^= r3; r4 ^= r2; \
422 r2 ^= r1; r3 |= r4; \
423 r3 &= r0; r4 ^= r2; \
424 r3 ^= r1; r1 |= r4; \
425 r1 ^= r0; r0 |= r4; \
426 r0 ^= r2; r1 ^= r4; \
427 r2 ^= r1; r1 &= r0; \
428 r1 ^= r4; r2 = ~r2; \
432 w = r4; x = r3; y = r1; z = r0; \
435 #define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
440 r0 &= r3; r4 |= r3; \
441 r2 = ~r2; r3 ^= r1; \
442 r1 |= r0; r0 ^= r2; \
443 r2 &= r4; r3 &= r4; \
444 r1 ^= r2; r2 ^= r0; \
445 r0 |= r2; r4 ^= r1; \
446 r0 ^= r3; r3 ^= r4; \
447 r4 |= r0; r3 ^= r2; \
450 w = r3; x = r0; y = r1; z = r4; \
453 /* XOR BLOCK1 into BLOCK0. */
454 #define BLOCK_XOR(block0, block1) \
456 block0[0] ^= block1[0]; \
457 block0[1] ^= block1[1]; \
458 block0[2] ^= block1[2]; \
459 block0[3] ^= block1[3]; \
462 /* Copy BLOCK_SRC to BLOCK_DST. */
463 #define BLOCK_COPY(block_dst, block_src) \
465 block_dst[0] = block_src[0]; \
466 block_dst[1] = block_src[1]; \
467 block_dst[2] = block_src[2]; \
468 block_dst[3] = block_src[3]; \
471 /* Apply SBOX number WHICH to to the block found in ARRAY0, writing
472 the output to the block found in ARRAY1. */
473 #define SBOX(which, array0, array1) \
474 SBOX##which (array0[0], array0[1], array0[2], array0[3], \
475 array1[0], array1[1], array1[2], array1[3]);
477 /* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
478 the output to the block found in ARRAY1. */
479 #define SBOX_INVERSE(which, array0, array1) \
480 SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3], \
481 array1[0], array1[1], array1[2], array1[3]);
483 /* Apply the linear transformation to BLOCK. */
484 #define LINEAR_TRANSFORMATION(block) \
486 block[0] = rol (block[0], 13); \
487 block[2] = rol (block[2], 3); \
488 block[1] = block[1] ^ block[0] ^ block[2]; \
489 block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
490 block[1] = rol (block[1], 1); \
491 block[3] = rol (block[3], 7); \
492 block[0] = block[0] ^ block[1] ^ block[3]; \
493 block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
494 block[0] = rol (block[0], 5); \
495 block[2] = rol (block[2], 22); \
498 /* Apply the inverse linear transformation to BLOCK. */
499 #define LINEAR_TRANSFORMATION_INVERSE(block) \
501 block[2] = ror (block[2], 22); \
502 block[0] = ror (block[0] , 5); \
503 block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
504 block[0] = block[0] ^ block[1] ^ block[3]; \
505 block[3] = ror (block[3], 7); \
506 block[1] = ror (block[1], 1); \
507 block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
508 block[1] = block[1] ^ block[0] ^ block[2]; \
509 block[2] = ror (block[2], 3); \
510 block[0] = ror (block[0], 13); \
513 /* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the
514 subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary storage.
515 This macro increments `round'. */
516 #define ROUND(which, subkeys, block, block_tmp) \
518 BLOCK_XOR (block, subkeys[round]); \
520 SBOX (which, block, block_tmp); \
521 LINEAR_TRANSFORMATION (block_tmp); \
522 BLOCK_COPY (block, block_tmp); \
525 /* Apply the last Serpent round to BLOCK, using the SBOX number WHICH
526 and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary
527 storage. The result will be stored in BLOCK_TMP. This macro
528 increments `round'. */
529 #define ROUND_LAST(which, subkeys, block, block_tmp) \
531 BLOCK_XOR (block, subkeys[round]); \
533 SBOX (which, block, block_tmp); \
534 BLOCK_XOR (block_tmp, subkeys[round]); \
538 /* Apply an inverse Serpent round to BLOCK, using the SBOX number
539 WHICH and the subkeys contained in SUBKEYS. Use BLOCK_TMP as
540 temporary storage. This macro increments `round'. */
541 #define ROUND_INVERSE(which, subkey, block, block_tmp) \
543 LINEAR_TRANSFORMATION_INVERSE (block); \
544 SBOX_INVERSE (which, block, block_tmp); \
545 BLOCK_XOR (block_tmp, subkey[round]); \
547 BLOCK_COPY (block, block_tmp); \
550 /* Apply the first Serpent round to BLOCK, using the SBOX number WHICH
551 and the subkeys contained in SUBKEYS. Use BLOCK_TMP as temporary
552 storage. The result will be stored in BLOCK_TMP. This macro
553 increments `round'. */
554 #define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \
556 BLOCK_XOR (block, subkeys[round]); \
558 SBOX_INVERSE (which, block, block_tmp); \
559 BLOCK_XOR (block_tmp, subkeys[round]); \
563 /* Convert the user provided key KEY of KEY_LENGTH bytes into the
564 internally used format. */
566 serpent_key_prepare (const byte *key, unsigned int key_length,
567 serpent_key_t key_prepared)
573 for (i = 0; i < key_length; i++)
574 key_prepared[i] = buf_get_le32 (key + i * 4);
578 /* Key must be padded according to the Serpent
580 key_prepared[i] = 0x00000001;
582 for (i++; i < 8; i++)
587 /* Derive the 33 subkeys from KEY and store them in SUBKEYS. */
589 serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
591 u32 w[8]; /* The `prekey'. */
595 /* Initialize with key values. */
605 /* Expand to intermediate key using the affine recurrence. */
606 #define EXPAND_KEY4(wo, r) \
607 wo[0] = w[(r+0)%8] = \
608 rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \
609 wo[1] = w[(r+1)%8] = \
610 rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \
611 wo[2] = w[(r+2)%8] = \
612 rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \
613 wo[3] = w[(r+3)%8] = \
614 rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
616 #define EXPAND_KEY(r) \
617 EXPAND_KEY4(ws, (r)); \
618 EXPAND_KEY4(wt, (r + 4));
620 /* Calculate subkeys via S-Boxes, in bitslice mode. */
621 EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
622 EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
623 EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
624 EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
625 EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
626 EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
627 EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
628 EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
629 EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
630 EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
631 EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
632 EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
633 EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
634 EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
635 EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
636 EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
637 EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
639 wipememory (ws, sizeof (ws));
640 wipememory (wt, sizeof (wt));
641 wipememory (w, sizeof (w));
644 /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */
646 serpent_setkey_internal (serpent_context_t *context,
647 const byte *key, unsigned int key_length)
649 serpent_key_t key_prepared;
651 serpent_key_prepare (key, key_length, key_prepared);
652 serpent_subkeys_generate (key_prepared, context->keys);
655 context->use_avx2 = 0;
656 if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
658 context->use_avx2 = 1;
663 context->use_neon = 0;
664 if ((_gcry_get_hw_features () & HWF_ARM_NEON))
666 context->use_neon = 1;
670 wipememory (key_prepared, sizeof(key_prepared));
673 /* Initialize CTX with the key KEY of KEY_LENGTH bytes. */
674 static gcry_err_code_t
675 serpent_setkey (void *ctx,
676 const byte *key, unsigned int key_length)
678 serpent_context_t *context = ctx;
679 static const char *serpent_test_ret;
680 static int serpent_init_done;
681 gcry_err_code_t ret = GPG_ERR_NO_ERROR;
683 if (! serpent_init_done)
685 /* Execute a self-test the first time, Serpent is used. */
686 serpent_init_done = 1;
687 serpent_test_ret = serpent_test ();
688 if (serpent_test_ret)
689 log_error ("Serpent test failure: %s\n", serpent_test_ret);
692 if (serpent_test_ret)
693 ret = GPG_ERR_SELFTEST_FAILED;
695 serpent_setkey_internal (context, key, key_length);
701 serpent_encrypt_internal (serpent_context_t *context,
702 const byte *input, byte *output)
704 serpent_block_t b, b_next;
707 b[0] = buf_get_le32 (input + 0);
708 b[1] = buf_get_le32 (input + 4);
709 b[2] = buf_get_le32 (input + 8);
710 b[3] = buf_get_le32 (input + 12);
712 ROUND (0, context->keys, b, b_next);
713 ROUND (1, context->keys, b, b_next);
714 ROUND (2, context->keys, b, b_next);
715 ROUND (3, context->keys, b, b_next);
716 ROUND (4, context->keys, b, b_next);
717 ROUND (5, context->keys, b, b_next);
718 ROUND (6, context->keys, b, b_next);
719 ROUND (7, context->keys, b, b_next);
720 ROUND (0, context->keys, b, b_next);
721 ROUND (1, context->keys, b, b_next);
722 ROUND (2, context->keys, b, b_next);
723 ROUND (3, context->keys, b, b_next);
724 ROUND (4, context->keys, b, b_next);
725 ROUND (5, context->keys, b, b_next);
726 ROUND (6, context->keys, b, b_next);
727 ROUND (7, context->keys, b, b_next);
728 ROUND (0, context->keys, b, b_next);
729 ROUND (1, context->keys, b, b_next);
730 ROUND (2, context->keys, b, b_next);
731 ROUND (3, context->keys, b, b_next);
732 ROUND (4, context->keys, b, b_next);
733 ROUND (5, context->keys, b, b_next);
734 ROUND (6, context->keys, b, b_next);
735 ROUND (7, context->keys, b, b_next);
736 ROUND (0, context->keys, b, b_next);
737 ROUND (1, context->keys, b, b_next);
738 ROUND (2, context->keys, b, b_next);
739 ROUND (3, context->keys, b, b_next);
740 ROUND (4, context->keys, b, b_next);
741 ROUND (5, context->keys, b, b_next);
742 ROUND (6, context->keys, b, b_next);
744 ROUND_LAST (7, context->keys, b, b_next);
746 buf_put_le32 (output + 0, b_next[0]);
747 buf_put_le32 (output + 4, b_next[1]);
748 buf_put_le32 (output + 8, b_next[2]);
749 buf_put_le32 (output + 12, b_next[3]);
753 serpent_decrypt_internal (serpent_context_t *context,
754 const byte *input, byte *output)
756 serpent_block_t b, b_next;
759 b_next[0] = buf_get_le32 (input + 0);
760 b_next[1] = buf_get_le32 (input + 4);
761 b_next[2] = buf_get_le32 (input + 8);
762 b_next[3] = buf_get_le32 (input + 12);
764 ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
766 ROUND_INVERSE (6, context->keys, b, b_next);
767 ROUND_INVERSE (5, context->keys, b, b_next);
768 ROUND_INVERSE (4, context->keys, b, b_next);
769 ROUND_INVERSE (3, context->keys, b, b_next);
770 ROUND_INVERSE (2, context->keys, b, b_next);
771 ROUND_INVERSE (1, context->keys, b, b_next);
772 ROUND_INVERSE (0, context->keys, b, b_next);
773 ROUND_INVERSE (7, context->keys, b, b_next);
774 ROUND_INVERSE (6, context->keys, b, b_next);
775 ROUND_INVERSE (5, context->keys, b, b_next);
776 ROUND_INVERSE (4, context->keys, b, b_next);
777 ROUND_INVERSE (3, context->keys, b, b_next);
778 ROUND_INVERSE (2, context->keys, b, b_next);
779 ROUND_INVERSE (1, context->keys, b, b_next);
780 ROUND_INVERSE (0, context->keys, b, b_next);
781 ROUND_INVERSE (7, context->keys, b, b_next);
782 ROUND_INVERSE (6, context->keys, b, b_next);
783 ROUND_INVERSE (5, context->keys, b, b_next);
784 ROUND_INVERSE (4, context->keys, b, b_next);
785 ROUND_INVERSE (3, context->keys, b, b_next);
786 ROUND_INVERSE (2, context->keys, b, b_next);
787 ROUND_INVERSE (1, context->keys, b, b_next);
788 ROUND_INVERSE (0, context->keys, b, b_next);
789 ROUND_INVERSE (7, context->keys, b, b_next);
790 ROUND_INVERSE (6, context->keys, b, b_next);
791 ROUND_INVERSE (5, context->keys, b, b_next);
792 ROUND_INVERSE (4, context->keys, b, b_next);
793 ROUND_INVERSE (3, context->keys, b, b_next);
794 ROUND_INVERSE (2, context->keys, b, b_next);
795 ROUND_INVERSE (1, context->keys, b, b_next);
796 ROUND_INVERSE (0, context->keys, b, b_next);
798 buf_put_le32 (output + 0, b_next[0]);
799 buf_put_le32 (output + 4, b_next[1]);
800 buf_put_le32 (output + 8, b_next[2]);
801 buf_put_le32 (output + 12, b_next[3]);
805 serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
807 serpent_context_t *context = ctx;
809 serpent_encrypt_internal (context, buffer_in, buffer_out);
810 return /*burn_stack*/ (2 * sizeof (serpent_block_t));
814 serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
816 serpent_context_t *context = ctx;
818 serpent_decrypt_internal (context, buffer_in, buffer_out);
819 return /*burn_stack*/ (2 * sizeof (serpent_block_t));
824 /* Bulk encryption of complete blocks in CTR mode. This function is only
825 intended for the bulk encryption feature of cipher.c. CTR is expected to be
826 of size sizeof(serpent_block_t). */
828 _gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
829 void *outbuf_arg, const void *inbuf_arg,
832 serpent_context_t *ctx = context;
833 unsigned char *outbuf = outbuf_arg;
834 const unsigned char *inbuf = inbuf_arg;
835 unsigned char tmpbuf[sizeof(serpent_block_t)];
836 int burn_stack_depth = 2 * sizeof (serpent_block_t);
842 int did_use_avx2 = 0;
844 /* Process data in 16 block chunks. */
845 while (nblocks >= 16)
847 _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
850 outbuf += 16 * sizeof(serpent_block_t);
851 inbuf += 16 * sizeof(serpent_block_t);
857 /* serpent-avx2 assembly code does not use stack */
859 burn_stack_depth = 0;
862 /* Use generic/sse2 code to handle smaller chunks... */
863 /* TODO: use caching instead? */
869 int did_use_sse2 = 0;
871 /* Process data in 8 block chunks. */
874 _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
877 outbuf += 8 * sizeof(serpent_block_t);
878 inbuf += 8 * sizeof(serpent_block_t);
884 /* serpent-sse2 assembly code does not use stack */
886 burn_stack_depth = 0;
889 /* Use generic code to handle smaller chunks... */
890 /* TODO: use caching instead? */
897 int did_use_neon = 0;
899 /* Process data in 8 block chunks. */
902 _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
905 outbuf += 8 * sizeof(serpent_block_t);
906 inbuf += 8 * sizeof(serpent_block_t);
912 /* serpent-neon assembly code does not use stack */
914 burn_stack_depth = 0;
917 /* Use generic code to handle smaller chunks... */
918 /* TODO: use caching instead? */
922 for ( ;nblocks; nblocks-- )
924 /* Encrypt the counter. */
925 serpent_encrypt_internal(ctx, ctr, tmpbuf);
926 /* XOR the input with the encrypted counter and store in output. */
927 buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
928 outbuf += sizeof(serpent_block_t);
929 inbuf += sizeof(serpent_block_t);
930 /* Increment the counter. */
931 for (i = sizeof(serpent_block_t); i > 0; i--)
939 wipememory(tmpbuf, sizeof(tmpbuf));
940 _gcry_burn_stack(burn_stack_depth);
943 /* Bulk decryption of complete blocks in CBC mode. This function is only
944 intended for the bulk encryption feature of cipher.c. */
946 _gcry_serpent_cbc_dec(void *context, unsigned char *iv,
947 void *outbuf_arg, const void *inbuf_arg,
950 serpent_context_t *ctx = context;
951 unsigned char *outbuf = outbuf_arg;
952 const unsigned char *inbuf = inbuf_arg;
953 unsigned char savebuf[sizeof(serpent_block_t)];
954 int burn_stack_depth = 2 * sizeof (serpent_block_t);
959 int did_use_avx2 = 0;
961 /* Process data in 16 block chunks. */
962 while (nblocks >= 16)
964 _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
967 outbuf += 16 * sizeof(serpent_block_t);
968 inbuf += 16 * sizeof(serpent_block_t);
974 /* serpent-avx2 assembly code does not use stack */
976 burn_stack_depth = 0;
979 /* Use generic/sse2 code to handle smaller chunks... */
985 int did_use_sse2 = 0;
987 /* Process data in 8 block chunks. */
990 _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
993 outbuf += 8 * sizeof(serpent_block_t);
994 inbuf += 8 * sizeof(serpent_block_t);
1000 /* serpent-sse2 assembly code does not use stack */
1002 burn_stack_depth = 0;
1005 /* Use generic code to handle smaller chunks... */
1012 int did_use_neon = 0;
1014 /* Process data in 8 block chunks. */
1015 while (nblocks >= 8)
1017 _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
1020 outbuf += 8 * sizeof(serpent_block_t);
1021 inbuf += 8 * sizeof(serpent_block_t);
1027 /* serpent-neon assembly code does not use stack */
1029 burn_stack_depth = 0;
1032 /* Use generic code to handle smaller chunks... */
1036 for ( ;nblocks; nblocks-- )
1038 /* INBUF is needed later and it may be identical to OUTBUF, so store
1039 the intermediate result to SAVEBUF. */
1040 serpent_decrypt_internal (ctx, inbuf, savebuf);
1042 buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t));
1043 inbuf += sizeof(serpent_block_t);
1044 outbuf += sizeof(serpent_block_t);
1047 wipememory(savebuf, sizeof(savebuf));
1048 _gcry_burn_stack(burn_stack_depth);
1051 /* Bulk decryption of complete blocks in CFB mode. This function is only
1052 intended for the bulk encryption feature of cipher.c. */
1054 _gcry_serpent_cfb_dec(void *context, unsigned char *iv,
1055 void *outbuf_arg, const void *inbuf_arg,
1058 serpent_context_t *ctx = context;
1059 unsigned char *outbuf = outbuf_arg;
1060 const unsigned char *inbuf = inbuf_arg;
1061 int burn_stack_depth = 2 * sizeof (serpent_block_t);
1066 int did_use_avx2 = 0;
1068 /* Process data in 16 block chunks. */
1069 while (nblocks >= 16)
1071 _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
1074 outbuf += 16 * sizeof(serpent_block_t);
1075 inbuf += 16 * sizeof(serpent_block_t);
1081 /* serpent-avx2 assembly code does not use stack */
1083 burn_stack_depth = 0;
1086 /* Use generic/sse2 code to handle smaller chunks... */
1092 int did_use_sse2 = 0;
1094 /* Process data in 8 block chunks. */
1095 while (nblocks >= 8)
1097 _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
1100 outbuf += 8 * sizeof(serpent_block_t);
1101 inbuf += 8 * sizeof(serpent_block_t);
1107 /* serpent-sse2 assembly code does not use stack */
1109 burn_stack_depth = 0;
1112 /* Use generic code to handle smaller chunks... */
1119 int did_use_neon = 0;
1121 /* Process data in 8 block chunks. */
1122 while (nblocks >= 8)
1124 _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
1127 outbuf += 8 * sizeof(serpent_block_t);
1128 inbuf += 8 * sizeof(serpent_block_t);
1134 /* serpent-neon assembly code does not use stack */
1136 burn_stack_depth = 0;
1139 /* Use generic code to handle smaller chunks... */
1143 for ( ;nblocks; nblocks-- )
1145 serpent_encrypt_internal(ctx, iv, iv);
1146 buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
1147 outbuf += sizeof(serpent_block_t);
1148 inbuf += sizeof(serpent_block_t);
1151 _gcry_burn_stack(burn_stack_depth);
1156 /* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR
1157 encryption. Returns NULL on success. */
1159 selftest_ctr_128 (void)
1161 const int nblocks = 16+8+1;
1162 const int blocksize = sizeof(serpent_block_t);
1163 const int context_size = sizeof(serpent_context_t);
1165 return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey,
1166 &serpent_encrypt, &_gcry_serpent_ctr_enc, nblocks, blocksize,
1171 /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
1172 Returns NULL on success. */
1174 selftest_cbc_128 (void)
1176 const int nblocks = 16+8+2;
1177 const int blocksize = sizeof(serpent_block_t);
1178 const int context_size = sizeof(serpent_context_t);
1180 return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey,
1181 &serpent_encrypt, &_gcry_serpent_cbc_dec, nblocks, blocksize,
1186 /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
1187 Returns NULL on success. */
1189 selftest_cfb_128 (void)
1191 const int nblocks = 16+8+2;
1192 const int blocksize = sizeof(serpent_block_t);
1193 const int context_size = sizeof(serpent_context_t);
1195 return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey,
1196 &serpent_encrypt, &_gcry_serpent_cfb_dec, nblocks, blocksize,
1206 serpent_context_t context;
1207 unsigned char scratch[16];
1214 unsigned char key[32];
1215 unsigned char text_plain[16];
1216 unsigned char text_cipher[16];
1221 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1222 "\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E",
1223 "\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D"
1227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1228 "\x00\x00\x00\x00\x00\x00\x00\x00",
1229 "\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E",
1230 "\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9"
1234 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1236 "\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E",
1237 "\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B"
1241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1243 "\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00",
1244 "\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C"
1251 for (i = 0; test_data[i].key_length; i++)
1253 serpent_setkey_internal (&context, test_data[i].key,
1254 test_data[i].key_length);
1255 serpent_encrypt_internal (&context, test_data[i].text_plain, scratch);
1257 if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t)))
1258 switch (test_data[i].key_length)
1261 return "Serpent-128 test encryption failed.";
1263 return "Serpent-192 test encryption failed.";
1265 return "Serpent-256 test encryption failed.";
1268 serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch);
1269 if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t)))
1270 switch (test_data[i].key_length)
1273 return "Serpent-128 test decryption failed.";
1275 return "Serpent-192 test decryption failed.";
1277 return "Serpent-256 test decryption failed.";
1281 if ( (r = selftest_ctr_128 ()) )
1284 if ( (r = selftest_cbc_128 ()) )
1287 if ( (r = selftest_cfb_128 ()) )
1295 /* "SERPENT" is an alias for "SERPENT128". */
1296 static const char *cipher_spec_serpent128_aliases[] =
1302 gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
1304 GCRY_CIPHER_SERPENT128, {0, 0},
1305 "SERPENT128", cipher_spec_serpent128_aliases, NULL, 16, 128,
1306 sizeof (serpent_context_t),
1307 serpent_setkey, serpent_encrypt, serpent_decrypt
1310 gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
1312 GCRY_CIPHER_SERPENT192, {0, 0},
1313 "SERPENT192", NULL, NULL, 16, 192,
1314 sizeof (serpent_context_t),
1315 serpent_setkey, serpent_encrypt, serpent_decrypt
1318 gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
1320 GCRY_CIPHER_SERPENT256, {0, 0},
1321 "SERPENT256", NULL, NULL, 16, 256,
1322 sizeof (serpent_context_t),
1323 serpent_setkey, serpent_encrypt, serpent_decrypt