1 C nettle, low-level cryptographics library
3 C Copyright (C) 2010, Niels Möller
5 C The nettle library is free software; you can redistribute it and/or modify
6 C it under the terms of the GNU Lesser General Public License as published by
7 C the Free Software Foundation; either version 2.1 of the License, or (at your
8 C option) any later version.
10 C The nettle library is distributed in the hope that it will be useful, but
11 C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 C License for more details.
15 C You should have received a copy of the GNU Lesser General Public License
16 C along with the nettle library; see the file COPYING.LIB. If not, write to
17 C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 define(<DST>, <%rax>) C Originally in %rdi
29 define(<S1>, <%rdi>) C Overlaps with CNT
31 define(<USE_SSE2>, <no>)
37 C memxor(uint8_t *dst, const uint8_t *src, size_t n)
48 C memxor3(uint8_t *dst, const uint8_t *a, const uint8_t *b, size_t n)
54 C %cl needed for shift count, so move away N
58 C Get number of unaligned bytes at the end
59 C %rdi is used as CNT, %rax as DST and as return value
70 C FIXME: Instead of this loop, could try cmov with memory
71 C destination, as a sequence of one 8-bit, one 16-bit and one
72 C 32-bit operations. (Except that cmov can't do 8-bit ops, so
73 C that step has to use a conditional).
77 movb (AP, N), LREG(TMP)
78 xorb (BP, N), LREG(TMP)
79 movb LREG(TMP), (DST, N)
84 ifelse(USE_SSE2, yes, <
88 C Check for the case that AP and BP have the same alignment,
89 C but different from DST.
102 C Unrolling, with aligned values alternating in S0 and S1
150 C FIXME: Handle the case N == 16 specially,
151 C like in the non-shifted case?
165 C Next destination word is -8(DST, N)
166 C Setup for unrolling
191 ja .Lword_loop C Not zero and no carry
194 C Final operation is word aligned
204 C ENTRY might have been 3 args, too, but it doesn't matter for the exit
212 movb (AP, N), LREG(TMP)
213 xorb (BP, N), LREG(TMP)
214 movb LREG(TMP), (DST, N)
220 C ENTRY might have been 3 args, too, but it doesn't matter for the exit
224 ifelse(USE_SSE2, yes, <
238 movdqu (AP, N), %xmm0
239 movdqu (BP, N), %xmm1
241 movdqa %xmm1, (DST, N)
246 C FIXME: See if we can do a full word first, before the
247 C byte-wise final loop.
250 C Final operation is aligned
255 C ENTRY might have been 3 args, too, but it doesn't matter for the exit