1 C x86_64/serpent-decrypt.asm
4 Copyright (C) 2011 Niels Möller
6 This file is part of GNU Nettle.
8 GNU Nettle is free software: you can redistribute it and/or
9 modify it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free
18 Software Foundation; either version 2 of the License, or (at your
19 option) any later version.
21 or both in parallel, as here.
23 GNU Nettle is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received copies of the GNU General Public License and
29 the GNU Lesser General Public License along with this program. If
30 not, see http://www.gnu.org/licenses/.
33 include_src(<x86_64/serpent.m4>)
37 C Single block serpent state, two copies
48 C Quadruple block serpent state, two copies
59 define(<MINUS1>, <%xmm8>)
61 define(<T1>, <%xmm10>)
62 define(<T2>, <%xmm11>)
63 define(<T3>, <%xmm12>)
72 define(<TMP32>, <%r14d>)
74 C SBOX macros. Inputs $1 - $4 (destroyed), outputs $5 - $8
532 .file "serpent-decrypt.asm"
534 C serpent_decrypt(struct serpent_context *ctx,
535 C size_t length, uint8_t *dst,
536 C const uint8_t *src)
539 PROLOGUE(nettle_serpent_decrypt)
540 C save all registers that need to be saved
556 pcmpeqd MINUS1, MINUS1
560 movups 16(SRC, N), X1
561 movups 32(SRC, N), X2
562 movups 48(SRC, N), X3
564 WTRANSPOSE(X0,X1,X2,X3)
568 C FIXME: CNT known, no index register needed
569 WKEYXOR(128, X0,X1,X2,X3)
578 WSBOX7I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
579 WKEYXOR(112, Y0,Y1,Y2,Y3)
582 WSBOX6I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
583 WKEYXOR(96, X0,X1,X2,X3)
586 WSBOX5I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
587 WKEYXOR(80, Y0,Y1,Y2,Y3)
590 WSBOX4I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
591 WKEYXOR(64, X0,X1,X2,X3)
594 WSBOX3I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
595 WKEYXOR(48, Y0,Y1,Y2,Y3)
598 WSBOX2I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
599 WKEYXOR(32, X0,X1,X2,X3)
602 WSBOX1I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
603 WKEYXOR(16, Y0,Y1,Y2,Y3)
606 WSBOX0I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
607 WKEYXOR(, X0,X1,X2,X3)
612 WTRANSPOSE(X0,X1,X2,X3)
615 movups X1, 16(DST, N)
616 movups X2, 32(DST, N)
617 movups X3, 48(DST, N)
619 C FIXME: Adjust N, so we can use just jnc without an extra cmp.
644 SBOX7I(x0,x1,x2,x3, y0,y1,y2,y3)
645 xor 112(CTX, CNT), y0
646 xor 116(CTX, CNT), y1
647 xor 120(CTX, CNT), y2
648 xor 124(CTX, CNT), y3
651 SBOX6I(y0,y1,y2,y3, x0,x1,x2,x3)
653 xor 100(CTX, CNT), x1
654 xor 104(CTX, CNT), x2
655 xor 108(CTX, CNT), x3
658 SBOX5I(x0,x1,x2,x3, y0,y1,y2,y3)
665 SBOX4I(y0,y1,y2,y3, x0,x1,x2,x3)
672 SBOX3I(x0,x1,x2,x3, y0,y1,y2,y3)
679 SBOX2I(y0,y1,y2,y3, x0,x1,x2,x3)
686 SBOX1I(x0,x1,x2,x3, y0,y1,y2,y3)
693 SBOX0I(y0,y1,y2,y3, x0,x1,x2,x3)