1 C nettle, low-level cryptographics library
3 C Copyright (C) 2011 Niels Möller
5 C The nettle library is free software; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General Public License as published by
7 C the Free Software Foundation; either version 2.1 of the License, or (at your
8 C option) any later version.
10 C The nettle library is distributed in the hope that it will be useful, but
11 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General Public License
16 C along with the nettle library; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
20 include_src(<x86_64/serpent.m4>)
24 C Single block serpent state, two copies
35 C Quadruple block serpent state, two copies
46 define(<MINUS1>, <%xmm8>)
48 define(<T1>, <%xmm10>)
49 define(<T2>, <%xmm11>)
50 define(<T3>, <%xmm12>)
59 define(<TMP32>, <%r14d>)
61 C SBOX macros. Inputs $1 - $4 (destroyed), outputs $5 - $8
519 .file "serpent-decrypt.asm"
521 C serpent_decrypt(struct serpent_context *ctx,
522 C unsigned length, uint8_t *dst,
523 C const uint8_t *src)
526 PROLOGUE(nettle_serpent_decrypt)
527 C save all registers that need to be saved
543 pcmpeqd MINUS1, MINUS1
547 movups 16(SRC, N), X1
548 movups 32(SRC, N), X2
549 movups 48(SRC, N), X3
551 WTRANSPOSE(X0,X1,X2,X3)
555 C FIXME: CNT known, no index register needed
556 WKEYXOR(128, X0,X1,X2,X3)
565 WSBOX7I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
566 WKEYXOR(112, Y0,Y1,Y2,Y3)
569 WSBOX6I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
570 WKEYXOR(96, X0,X1,X2,X3)
573 WSBOX5I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
574 WKEYXOR(80, Y0,Y1,Y2,Y3)
577 WSBOX4I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
578 WKEYXOR(64, X0,X1,X2,X3)
581 WSBOX3I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
582 WKEYXOR(48, Y0,Y1,Y2,Y3)
585 WSBOX2I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
586 WKEYXOR(32, X0,X1,X2,X3)
589 WSBOX1I(X0,X1,X2,X3, Y0,Y1,Y2,Y3)
590 WKEYXOR(16, Y0,Y1,Y2,Y3)
593 WSBOX0I(Y0,Y1,Y2,Y3, X0,X1,X2,X3)
594 WKEYXOR(, X0,X1,X2,X3)
599 WTRANSPOSE(X0,X1,X2,X3)
602 movups X1, 16(DST, N)
603 movups X2, 32(DST, N)
604 movups X3, 48(DST, N)
606 C FIXME: Adjust N, so we can use just jnc without an extra cmp.
631 SBOX7I(x0,x1,x2,x3, y0,y1,y2,y3)
632 xor 112(CTX, CNT), y0
633 xor 116(CTX, CNT), y1
634 xor 120(CTX, CNT), y2
635 xor 124(CTX, CNT), y3
638 SBOX6I(y0,y1,y2,y3, x0,x1,x2,x3)
640 xor 100(CTX, CNT), x1
641 xor 104(CTX, CNT), x2
642 xor 108(CTX, CNT), x3
645 SBOX5I(x0,x1,x2,x3, y0,y1,y2,y3)
652 SBOX4I(y0,y1,y2,y3, x0,x1,x2,x3)
659 SBOX3I(x0,x1,x2,x3, y0,y1,y2,y3)
666 SBOX2I(y0,y1,y2,y3, x0,x1,x2,x3)
673 SBOX1I(x0,x1,x2,x3, y0,y1,y2,y3)
680 SBOX0I(y0,y1,y2,y3, x0,x1,x2,x3)