1 C x86_64/sha1-compress.asm
4 Copyright (C) 2004, 2008, 2013 Niels Möller
6 This file is part of GNU Nettle.
8 GNU Nettle is free software: you can redistribute it and/or
9 modify it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free
18 Software Foundation; either version 2 of the License, or (at your
19 option) any later version.
21 or both in parallel, as here.
23 GNU Nettle is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received copies of the GNU General Public License and
29 the GNU Lesser General Public License along with this program. If
30 not, see http://www.gnu.org/licenses/.
33 C Register usage. KVALUE and INPUT share a register.
34 define(<SA>,<%eax>)dnl
35 define(<SB>,<%r8d>)dnl
36 define(<SC>,<%ecx>)dnl
37 define(<SD>,<%edx>)dnl
38 define(<SE>,<%r9d>)dnl
39 define(<DATA>,<%rsp>)dnl
40 define(<T1>,<%r10d>)dnl
41 define(<T2>,<%r11d>)dnl
42 define(<KVALUE>, <%esi>)dnl
45 define(<STATE>,<%rdi>)dnl
46 define(<INPUT>,<%rsi>)dnl
49 define(<K1VALUE>, <<$>0x5A827999>)dnl C Rounds 0-19
50 define(<K2VALUE>, <<$>0x6ED9EBA1>)dnl C Rounds 20-39
51 define(<K3VALUE>, <<$>0x8F1BBCDC>)dnl C Rounds 40-59
52 define(<K4VALUE>, <<$>0xCA62C1D6>)dnl C Rounds 60-79
54 C Reads the input into register, byteswaps it, and stores it in the DATA array.
55 C SWAP(index, register)
57 movl OFFSET($1)(INPUT), $2
59 movl $2, OFFSET($1) (DATA)
64 C f1(x,y,z) = z ^ (x & (y ^ z))
65 C f2(x,y,z) = x ^ y ^ z
66 C f3(x,y,z) = (x & y) | (z & (x | y))
67 C = (x & (y ^ z)) + (y & z)
70 C This form for f3 was suggested by George Spelvin. The terms can be
71 C added into the result one at a time, saving one temporary.
73 C expand(i) is the expansion function
75 C W[i] = (W[i - 16] ^ W[i - 14] ^ W[i - 8] ^ W[i - 3]) <<< 1
77 C where W[i] is stored in DATA[i mod 16].
79 C The form of one sha1 round is
81 C a' = e + a <<< 5 + f( b, c, d ) + k + w;
87 C where <<< denotes rotation. We permute our variables, so that we
90 C e += a <<< 5 + f( b, c, d ) + k + w;
93 dnl ROUND_F1(a, b, c, d, e, i)
95 movl OFFSET(eval($6 % 16)) (DATA), T1
96 xorl OFFSET(eval(($6 + 2) % 16)) (DATA), T1
97 xorl OFFSET(eval(($6 + 8) % 16)) (DATA), T1
98 xorl OFFSET(eval(($6 + 13) % 16)) (DATA), T1
100 movl T1, OFFSET(eval($6 % 16)) (DATA)
114 dnl ROUND_F1_NOEXP(a, b, c, d, e, i)
115 define(<ROUND_F1_NOEXP>, <
120 addl OFFSET($6) (DATA), $5
129 dnl ROUND_F2(a, b, c, d, e, i)
131 movl OFFSET(eval($6 % 16)) (DATA), T1
132 xorl OFFSET(eval(($6 + 2) % 16)) (DATA), T1
133 xorl OFFSET(eval(($6 + 8) % 16)) (DATA), T1
134 xorl OFFSET(eval(($6 + 13) % 16)) (DATA), T1
136 movl T1, OFFSET(eval($6 % 16)) (DATA)
149 dnl ROUND_F3(a, b, c, d, e, i)
151 movl OFFSET(eval($6 % 16)) (DATA), T1
152 xorl OFFSET(eval(($6 + 2) % 16)) (DATA), T1
153 xorl OFFSET(eval(($6 + 8) % 16)) (DATA), T1
154 xorl OFFSET(eval(($6 + 13) % 16)) (DATA), T1
156 movl T1, OFFSET(eval($6 % 16)) (DATA)
172 .file "sha1-compress.asm"
174 C _nettle_sha1_compress(uint32_t *state, uint8_t *input)
178 PROLOGUE(_nettle_sha1_compress)
179 C save all registers that need to be saved
182 sub $64, %rsp C %rsp = W
184 C Load and byteswap data
185 SWAP( 0, SA) SWAP( 1, SB) SWAP( 2, SC) SWAP( 3, SD)
186 SWAP( 4, SA) SWAP( 5, SB) SWAP( 6, SC) SWAP( 7, SD)
187 SWAP( 8, SA) SWAP( 9, SB) SWAP(10, SC) SWAP(11, SD)
188 SWAP(12, SA) SWAP(13, SB) SWAP(14, SC) SWAP(15, SD)
190 C Load the state vector
198 ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 0)
199 ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 1)
200 ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 2)
201 ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 3)
202 ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 4)
204 ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 5)
205 ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 6)
206 ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 7)
207 ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 8)
208 ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 9)
210 ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 10)
211 ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 11)
212 ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 12)
213 ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 13)
214 ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 14)
216 ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 15)
217 ROUND_F1(SE, SA, SB, SC, SD, 16)
218 ROUND_F1(SD, SE, SA, SB, SC, 17)
219 ROUND_F1(SC, SD, SE, SA, SB, 18)
220 ROUND_F1(SB, SC, SD, SE, SA, 19)
223 ROUND_F2(SA, SB, SC, SD, SE, 20)
224 ROUND_F2(SE, SA, SB, SC, SD, 21)
225 ROUND_F2(SD, SE, SA, SB, SC, 22)
226 ROUND_F2(SC, SD, SE, SA, SB, 23)
227 ROUND_F2(SB, SC, SD, SE, SA, 24)
229 ROUND_F2(SA, SB, SC, SD, SE, 25)
230 ROUND_F2(SE, SA, SB, SC, SD, 26)
231 ROUND_F2(SD, SE, SA, SB, SC, 27)
232 ROUND_F2(SC, SD, SE, SA, SB, 28)
233 ROUND_F2(SB, SC, SD, SE, SA, 29)
235 ROUND_F2(SA, SB, SC, SD, SE, 30)
236 ROUND_F2(SE, SA, SB, SC, SD, 31)
237 ROUND_F2(SD, SE, SA, SB, SC, 32)
238 ROUND_F2(SC, SD, SE, SA, SB, 33)
239 ROUND_F2(SB, SC, SD, SE, SA, 34)
241 ROUND_F2(SA, SB, SC, SD, SE, 35)
242 ROUND_F2(SE, SA, SB, SC, SD, 36)
243 ROUND_F2(SD, SE, SA, SB, SC, 37)
244 ROUND_F2(SC, SD, SE, SA, SB, 38)
245 ROUND_F2(SB, SC, SD, SE, SA, 39)
248 ROUND_F3(SA, SB, SC, SD, SE, 40)
249 ROUND_F3(SE, SA, SB, SC, SD, 41)
250 ROUND_F3(SD, SE, SA, SB, SC, 42)
251 ROUND_F3(SC, SD, SE, SA, SB, 43)
252 ROUND_F3(SB, SC, SD, SE, SA, 44)
254 ROUND_F3(SA, SB, SC, SD, SE, 45)
255 ROUND_F3(SE, SA, SB, SC, SD, 46)
256 ROUND_F3(SD, SE, SA, SB, SC, 47)
257 ROUND_F3(SC, SD, SE, SA, SB, 48)
258 ROUND_F3(SB, SC, SD, SE, SA, 49)
260 ROUND_F3(SA, SB, SC, SD, SE, 50)
261 ROUND_F3(SE, SA, SB, SC, SD, 51)
262 ROUND_F3(SD, SE, SA, SB, SC, 52)
263 ROUND_F3(SC, SD, SE, SA, SB, 53)
264 ROUND_F3(SB, SC, SD, SE, SA, 54)
266 ROUND_F3(SA, SB, SC, SD, SE, 55)
267 ROUND_F3(SE, SA, SB, SC, SD, 56)
268 ROUND_F3(SD, SE, SA, SB, SC, 57)
269 ROUND_F3(SC, SD, SE, SA, SB, 58)
270 ROUND_F3(SB, SC, SD, SE, SA, 59)
273 ROUND_F2(SA, SB, SC, SD, SE, 60)
274 ROUND_F2(SE, SA, SB, SC, SD, 61)
275 ROUND_F2(SD, SE, SA, SB, SC, 62)
276 ROUND_F2(SC, SD, SE, SA, SB, 63)
277 ROUND_F2(SB, SC, SD, SE, SA, 64)
279 ROUND_F2(SA, SB, SC, SD, SE, 65)
280 ROUND_F2(SE, SA, SB, SC, SD, 66)
281 ROUND_F2(SD, SE, SA, SB, SC, 67)
282 ROUND_F2(SC, SD, SE, SA, SB, 68)
283 ROUND_F2(SB, SC, SD, SE, SA, 69)
285 ROUND_F2(SA, SB, SC, SD, SE, 70)
286 ROUND_F2(SE, SA, SB, SC, SD, 71)
287 ROUND_F2(SD, SE, SA, SB, SC, 72)
288 ROUND_F2(SC, SD, SE, SA, SB, 73)
289 ROUND_F2(SB, SC, SD, SE, SA, 74)
291 ROUND_F2(SA, SB, SC, SD, SE, 75)
292 ROUND_F2(SE, SA, SB, SC, SD, 76)
293 ROUND_F2(SD, SE, SA, SB, SC, 77)
294 ROUND_F2(SC, SD, SE, SA, SB, 78)
295 ROUND_F2(SB, SC, SD, SE, SA, 79)
297 C Update the state vector
307 EPILOGUE(_nettle_sha1_compress)