1 /* twofish-arm.S - ARM assembly implementation of Twofish cipher
3 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * This file is part of Libgcrypt.
7 * Libgcrypt is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as
9 * published by the Free Software Foundation; either version 2.1 of
10 * the License, or (at your option) any later version.
12 * Libgcrypt is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #if defined(__ARMEL__)
24 #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
31 /* structure of TWOFISH_context: */
33 #define s1 ((s0) + 4 * 256)
34 #define s2 ((s1) + 4 * 256)
35 #define s3 ((s2) + 4 * 256)
36 #define w ((s3) + 4 * 256)
37 #define k ((w) + 4 * 8)
61 #define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
62 ldrb rout, [rsrc, #((offs) + 0)]; \
63 ldrb rtmp, [rsrc, #((offs) + 1)]; \
64 orr rout, rout, rtmp, lsl #8; \
65 ldrb rtmp, [rsrc, #((offs) + 2)]; \
66 orr rout, rout, rtmp, lsl #16; \
67 ldrb rtmp, [rsrc, #((offs) + 3)]; \
68 orr rout, rout, rtmp, lsl #24;
70 #define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
71 mov rtmp0, rin, lsr #8; \
72 strb rin, [rdst, #((offs) + 0)]; \
73 mov rtmp1, rin, lsr #16; \
74 strb rtmp0, [rdst, #((offs) + 1)]; \
75 mov rtmp0, rin, lsr #24; \
76 strb rtmp1, [rdst, #((offs) + 2)]; \
77 strb rtmp0, [rdst, #((offs) + 3)];
80 /* bswap on big-endian */
81 #define host_to_le(reg) \
83 #define le_to_host(reg) \
86 /* nop on little-endian */
87 #define host_to_le(reg) /*_*/
88 #define le_to_host(reg) /*_*/
91 #define ldr_input_aligned_le(rin, a, b, c, d) \
101 #define str_output_aligned_le(rout, a, b, c, d) \
111 #ifdef __ARM_FEATURE_UNALIGNED
112 /* unaligned word reads/writes allowed */
113 #define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \
114 ldr_input_aligned_le(rin, ra, rb, rc, rd)
116 #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
117 str_output_aligned_le(rout, ra, rb, rc, rd)
119 /* need to handle unaligned reads/writes by byte reads */
120 #define ldr_input_le(rin, ra, rb, rc, rd, rtmp0) \
123 ldr_unaligned_le(ra, rin, 0, rtmp0); \
124 ldr_unaligned_le(rb, rin, 4, rtmp0); \
125 ldr_unaligned_le(rc, rin, 8, rtmp0); \
126 ldr_unaligned_le(rd, rin, 12, rtmp0); \
129 ldr_input_aligned_le(rin, ra, rb, rc, rd); \
132 #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
135 str_unaligned_le(ra, rout, 0, rtmp0, rtmp1); \
136 str_unaligned_le(rb, rout, 4, rtmp0, rtmp1); \
137 str_unaligned_le(rc, rout, 8, rtmp0, rtmp1); \
138 str_unaligned_le(rd, rout, 12, rtmp0, rtmp1); \
141 str_output_aligned_le(rout, ra, rb, rc, rd); \
145 /**********************************************************************
147 **********************************************************************/
148 #define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \
149 and RT0, RMASK, b, lsr#(8 - 2); \
150 and RY, RMASK, b, lsr#(16 - 2); \
151 add RT0, RT0, #(s2 - s1); \
152 and RT1, RMASK, b, lsr#(24 - 2); \
153 ldr RY, [CTXs3, RY]; \
154 and RT2, RMASK, b, lsl#(2); \
155 ldr RT0, [CTXs1, RT0]; \
156 and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \
157 ldr RT1, [CTXs0, RT1]; \
158 and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \
159 ldr RT2, [CTXs1, RT2]; \
160 add RT3, RT3, #(s2 - s1); \
161 ldr RX, [CTXs1, RX]; \
165 ldr RT3, [CTXs1, RT3]; \
166 and RT0, RMASK, a, lsl#(2); \
168 and RT1, RMASK, a, lsr#(24 - 2); \
170 ldr RT0, [CTXs0, RT0]; \
172 ldr RT1, [CTXs3, RT1]; \
175 ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
177 ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
179 add RT0, RX, RY, lsl #1; \
183 eor rd, RT0, rd, ror #31; \
186 #define dummy(x) /*_*/
191 #define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \
192 and RT3, RMASK, b, lsl#(2 - (adj_b)); \
193 and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \
195 and RT2, RMASK, a, lsl#(2); \
196 and RT0, RMASK, a, lsr#(8 - 2); \
198 ldr RY, [CTXs1, RT3]; \
199 add RT1, RT1, #(s2 - s1); \
200 ldr RX, [CTXs0, RT2]; \
201 and RT3, RMASK, b, lsr#(16 - 2); \
202 ldr RT1, [CTXs1, RT1]; \
203 and RT2, RMASK, a, lsr#(16 - 2); \
204 ldr RT0, [CTXs1, RT0]; \
206 add RT2, RT2, #(s2 - s1); \
207 ldr RT3, [CTXs3, RT3]; \
210 and RT1, RMASK, b, lsr#(24 - 2); \
212 ldr RT2, [CTXs1, RT2]; \
213 and RT0, RMASK, a, lsr#(24 - 2); \
215 ldr RT1, [CTXs0, RT1]; \
218 ldr RT0, [CTXs3, RT0]; \
222 ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
224 ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
226 add RT0, RX, RY, lsl #1; \
231 eor rc, RX, rc, ror #31;
233 #define first_encrypt_cycle(nc) \
234 encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \
235 encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
237 #define encrypt_cycle(nc) \
238 encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
239 encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
241 #define last_encrypt_cycle(nc) \
242 encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
243 encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
246 #define first_decrypt_cycle(nc) \
247 decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \
248 decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
250 #define decrypt_cycle(nc) \
251 decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
252 decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
254 #define last_decrypt_cycle(nc) \
255 decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
256 decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
260 .globl _gcry_twofish_arm_encrypt_block
261 .type _gcry_twofish_arm_encrypt_block,%function;
263 _gcry_twofish_arm_encrypt_block:
269 push {%r1, %r4-%r11, %ip, %lr};
273 ldr_input_le(%r2, RA, RB, RC, RD, RT0);
275 /* Input whitening */
276 ldm RY, {RT0, RT1, RT2, RT3};
277 add CTXs3, CTXs0, #(s3 - s0);
278 add CTXs1, CTXs0, #(s1 - s0);
279 mov RMASK, #(0xff << 2);
285 first_encrypt_cycle(0);
292 last_encrypt_cycle(7);
294 add RY, CTXs3, #(w + 4*4 - s3);
297 /* Output whitening */
298 ldm RY, {RT0, RT1, RT2, RT3};
304 str_output_le(%r1, RC, RD, RA, RB, RT0, RT1);
306 pop {%r4-%r11, %ip, %pc};
308 .size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
311 .globl _gcry_twofish_arm_decrypt_block
312 .type _gcry_twofish_arm_decrypt_block,%function;
314 _gcry_twofish_arm_decrypt_block:
320 push {%r1, %r4-%r11, %ip, %lr};
322 add CTXs3, CTXs0, #(s3 - s0);
324 ldr_input_le(%r2, RC, RD, RA, RB, RT0);
326 add RY, CTXs3, #(w + 4*4 - s3);
327 add CTXs3, CTXs0, #(s3 - s0);
329 /* Input whitening */
330 ldm RY, {RT0, RT1, RT2, RT3};
331 add CTXs1, CTXs0, #(s1 - s0);
332 mov RMASK, #(0xff << 2);
338 first_decrypt_cycle(7);
345 last_decrypt_cycle(0);
350 /* Output whitening */
351 ldm RY, {RT0, RT1, RT2, RT3};
357 str_output_le(%r1, RA, RB, RC, RD, RT0, RT1);
359 pop {%r4-%r11, %ip, %pc};
360 .size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
362 #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/