1 C arm/neon/sha3-permute.asm
4 Copyright (C) 2013 Niels Möller
6 This file is part of GNU Nettle.
8 GNU Nettle is free software: you can redistribute it and/or
9 modify it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free
18 Software Foundation; either version 2 of the License, or (at your
19 option) any later version.
21 or both in parallel, as here.
23 GNU Nettle is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received copies of the GNU General Public License and
29 the GNU Lesser General Public License along with this program. If
30 not, see http://www.gnu.org/licenses/.
33 .file "sha3-permute.asm"
81 C ROL(DST, SRC, COUNT)
82 C Must have SRC != DST
84 vshr.u64 $1, $2, #eval(64-$3)
87 C sha3_permute(struct sha3_ctx *ctx)
92 .quad 0x0000000000000001
93 .quad 0x0000000000008082
94 .quad 0x800000000000808A
95 .quad 0x8000000080008000
96 .quad 0x000000000000808B
97 .quad 0x0000000080000001
98 .quad 0x8000000080008081
99 .quad 0x8000000000008009
100 .quad 0x000000000000008A
101 .quad 0x0000000000000088
102 .quad 0x0000000080008009
103 .quad 0x000000008000000A
104 .quad 0x000000008000808B
105 .quad 0x800000000000008B
106 .quad 0x8000000000008089
107 .quad 0x8000000000008003
108 .quad 0x8000000000008002
109 .quad 0x8000000000000080
110 .quad 0x000000000000800A
111 .quad 0x800000008000000A
112 .quad 0x8000000080008081
113 .quad 0x8000000000008080
114 .quad 0x0000000080000001
115 .quad 0x8000000080008008
117 PROLOGUE(nettle_sha3_permute)
121 vldm CTX!, {A1,A2,A3,A4}
123 vldm CTX!, {A6,A7,A8,A9}
124 vld1.64 {A10}, [CTX]!
125 vldm CTX!, {A11,A12,A13,A14}
126 vld1.64 {A15}, [CTX]!
127 vldm CTX!, {A16,A17,A18,A19}
128 vld1.64 {A20}, [CTX]!
129 vldm CTX, {A21,A22,A23,A24}
137 veor QREG(T0), QREG(A5), QREG(A15)
140 veor QREG(C1), QREG(A1), QREG(A6)
141 veor QREG(C1), QREG(C1), QREG(A11)
142 veor QREG(C1), QREG(C1), QREG(A16)
143 veor QREG(C1), QREG(C1), QREG(A21)
145 veor QREG(C3), QREG(A3), QREG(A8)
146 veor QREG(C3), QREG(C3), QREG(A13)
147 veor QREG(C3), QREG(C3), QREG(A18)
148 veor QREG(C3), QREG(C3), QREG(A23)
150 C D0 = C4 ^ (C1 <<< 1)
151 C NOTE: Using ROL macro (and vsli) is slightly slower.
158 veor QREG(A5), QREG(A5), QREG(T0)
159 veor QREG(A15), QREG(A15), QREG(T0)
161 C D1 = C0 ^ (C2 <<< 1)
162 C D2 = C1 ^ (C3 <<< 1)
167 veor QREG(A1), QREG(A1), QREG(T0)
168 veor QREG(A6), QREG(A6), QREG(T0)
169 veor QREG(A11), QREG(A11), QREG(T0)
170 veor QREG(A16), QREG(A16), QREG(T0)
171 veor QREG(A21), QREG(A21), QREG(T0)
173 C D3 = C2 ^ (C4 <<< 1)
174 C D4 = C3 ^ (C0 <<< 1)
179 veor QREG(A3), QREG(A3), QREG(T0)
180 veor QREG(A8), QREG(A8), QREG(T0)
181 veor QREG(A13), QREG(A13), QREG(T0)
182 veor QREG(A18), QREG(A18), QREG(T0)
183 veor QREG(A23), QREG(A23), QREG(T0)
209 C New A10 value left in T0
218 vld1.64 {C0}, [RC :64]!
219 veor QREG(A1), QREG(A1), QREG(C1)
220 veor QREG(A3), QREG(A3), QREG(C3)
230 veor QREG(A6), QREG(A6), QREG(C1)
231 veor QREG(A8), QREG(A8), QREG(C3)
240 veor QREG(A11), QREG(A11), QREG(C1)
241 veor QREG(A13), QREG(A13), QREG(C3)
250 veor QREG(A16), QREG(A16), QREG(C1)
251 veor QREG(A18), QREG(A18), QREG(C3)
259 subs COUNT, COUNT, #1
261 veor QREG(A21), QREG(A21), QREG(C1)
262 veor QREG(A23), QREG(A23), QREG(C3)
267 vstm CTX!, {A1,A2,A3,A4}
269 vstm CTX!, {A6,A7,A8,A9}
270 vst1.64 {A10}, [CTX]!
271 vstm CTX!, {A11,A12,A13,A14}
272 vst1.64 {A15}, [CTX]!
273 vstm CTX!, {A16,A17,A18,A19}
274 vst1.64 {A20}, [CTX]!
275 vstm CTX, {A21,A22,A23,A24}
279 EPILOGUE(nettle_sha3_permute)