crypto: cast5/avx - fix storing of new IV in CBC encryption
[platform/adaptation/renesas_rcar/renesas_kernel.git] / arch / x86 / crypto / cast5_avx_glue.c
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/algapi.h>
30 #include <crypto/cast5.h>
31 #include <crypto/cryptd.h>
32 #include <crypto/ctr.h>
33 #include <asm/xcr.h>
34 #include <asm/xsave.h>
35 #include <asm/crypto/ablk_helper.h>
36 #include <asm/crypto/glue_helper.h>
37
38 #define CAST5_PARALLEL_BLOCKS 16
39
40 asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
41                                       const u8 *src, bool xor);
42 asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
43                                     const u8 *src);
44
45 static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
46                                       const u8 *src)
47 {
48         __cast5_enc_blk_16way(ctx, dst, src, false);
49 }
50
51 static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
52                                           const u8 *src)
53 {
54         __cast5_enc_blk_16way(ctx, dst, src, true);
55 }
56
57 static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
58                                       const u8 *src)
59 {
60         cast5_dec_blk_16way(ctx, dst, src);
61 }
62
63
64 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
65 {
66         return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
67                               NULL, fpu_enabled, nbytes);
68 }
69
70 static inline void cast5_fpu_end(bool fpu_enabled)
71 {
72         return glue_fpu_end(fpu_enabled);
73 }
74
75 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
76                      bool enc)
77 {
78         bool fpu_enabled = false;
79         struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80         const unsigned int bsize = CAST5_BLOCK_SIZE;
81         unsigned int nbytes;
82         int err;
83
84         err = blkcipher_walk_virt(desc, walk);
85         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
86
87         while ((nbytes = walk->nbytes)) {
88                 u8 *wsrc = walk->src.virt.addr;
89                 u8 *wdst = walk->dst.virt.addr;
90
91                 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
92
93                 /* Process multi-block batch */
94                 if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
95                         do {
96                                 if (enc)
97                                         cast5_enc_blk_xway(ctx, wdst, wsrc);
98                                 else
99                                         cast5_dec_blk_xway(ctx, wdst, wsrc);
100
101                                 wsrc += bsize * CAST5_PARALLEL_BLOCKS;
102                                 wdst += bsize * CAST5_PARALLEL_BLOCKS;
103                                 nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
104                         } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
105
106                         if (nbytes < bsize)
107                                 goto done;
108                 }
109
110                 /* Handle leftovers */
111                 do {
112                         if (enc)
113                                 __cast5_encrypt(ctx, wdst, wsrc);
114                         else
115                                 __cast5_decrypt(ctx, wdst, wsrc);
116
117                         wsrc += bsize;
118                         wdst += bsize;
119                         nbytes -= bsize;
120                 } while (nbytes >= bsize);
121
122 done:
123                 err = blkcipher_walk_done(desc, walk, nbytes);
124         }
125
126         cast5_fpu_end(fpu_enabled);
127         return err;
128 }
129
130 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131                        struct scatterlist *src, unsigned int nbytes)
132 {
133         struct blkcipher_walk walk;
134
135         blkcipher_walk_init(&walk, dst, src, nbytes);
136         return ecb_crypt(desc, &walk, true);
137 }
138
139 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
140                        struct scatterlist *src, unsigned int nbytes)
141 {
142         struct blkcipher_walk walk;
143
144         blkcipher_walk_init(&walk, dst, src, nbytes);
145         return ecb_crypt(desc, &walk, false);
146 }
147
148 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
149                                   struct blkcipher_walk *walk)
150 {
151         struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
152         const unsigned int bsize = CAST5_BLOCK_SIZE;
153         unsigned int nbytes = walk->nbytes;
154         u64 *src = (u64 *)walk->src.virt.addr;
155         u64 *dst = (u64 *)walk->dst.virt.addr;
156         u64 *iv = (u64 *)walk->iv;
157
158         do {
159                 *dst = *src ^ *iv;
160                 __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
161                 iv = dst;
162
163                 src += 1;
164                 dst += 1;
165                 nbytes -= bsize;
166         } while (nbytes >= bsize);
167
168         *(u64 *)walk->iv = *iv;
169         return nbytes;
170 }
171
172 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173                        struct scatterlist *src, unsigned int nbytes)
174 {
175         struct blkcipher_walk walk;
176         int err;
177
178         blkcipher_walk_init(&walk, dst, src, nbytes);
179         err = blkcipher_walk_virt(desc, &walk);
180
181         while ((nbytes = walk.nbytes)) {
182                 nbytes = __cbc_encrypt(desc, &walk);
183                 err = blkcipher_walk_done(desc, &walk, nbytes);
184         }
185
186         return err;
187 }
188
189 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
190                                   struct blkcipher_walk *walk)
191 {
192         struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
193         const unsigned int bsize = CAST5_BLOCK_SIZE;
194         unsigned int nbytes = walk->nbytes;
195         u64 *src = (u64 *)walk->src.virt.addr;
196         u64 *dst = (u64 *)walk->dst.virt.addr;
197         u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
198         u64 last_iv;
199         int i;
200
201         /* Start of the last block. */
202         src += nbytes / bsize - 1;
203         dst += nbytes / bsize - 1;
204
205         last_iv = *src;
206
207         /* Process multi-block batch */
208         if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
209                 do {
210                         nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
211                         src -= CAST5_PARALLEL_BLOCKS - 1;
212                         dst -= CAST5_PARALLEL_BLOCKS - 1;
213
214                         for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
215                                 ivs[i] = src[i];
216
217                         cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
218
219                         for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
220                                 *(dst + (i + 1)) ^= *(ivs + i);
221
222                         nbytes -= bsize;
223                         if (nbytes < bsize)
224                                 goto done;
225
226                         *dst ^= *(src - 1);
227                         src -= 1;
228                         dst -= 1;
229                 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
230
231                 if (nbytes < bsize)
232                         goto done;
233         }
234
235         /* Handle leftovers */
236         for (;;) {
237                 __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
238
239                 nbytes -= bsize;
240                 if (nbytes < bsize)
241                         break;
242
243                 *dst ^= *(src - 1);
244                 src -= 1;
245                 dst -= 1;
246         }
247
248 done:
249         *dst ^= *(u64 *)walk->iv;
250         *(u64 *)walk->iv = last_iv;
251
252         return nbytes;
253 }
254
255 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
256                        struct scatterlist *src, unsigned int nbytes)
257 {
258         bool fpu_enabled = false;
259         struct blkcipher_walk walk;
260         int err;
261
262         blkcipher_walk_init(&walk, dst, src, nbytes);
263         err = blkcipher_walk_virt(desc, &walk);
264         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
265
266         while ((nbytes = walk.nbytes)) {
267                 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
268                 nbytes = __cbc_decrypt(desc, &walk);
269                 err = blkcipher_walk_done(desc, &walk, nbytes);
270         }
271
272         cast5_fpu_end(fpu_enabled);
273         return err;
274 }
275
276 static void ctr_crypt_final(struct blkcipher_desc *desc,
277                             struct blkcipher_walk *walk)
278 {
279         struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
280         u8 *ctrblk = walk->iv;
281         u8 keystream[CAST5_BLOCK_SIZE];
282         u8 *src = walk->src.virt.addr;
283         u8 *dst = walk->dst.virt.addr;
284         unsigned int nbytes = walk->nbytes;
285
286         __cast5_encrypt(ctx, keystream, ctrblk);
287         crypto_xor(keystream, src, nbytes);
288         memcpy(dst, keystream, nbytes);
289
290         crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
291 }
292
293 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
294                                 struct blkcipher_walk *walk)
295 {
296         struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
297         const unsigned int bsize = CAST5_BLOCK_SIZE;
298         unsigned int nbytes = walk->nbytes;
299         u64 *src = (u64 *)walk->src.virt.addr;
300         u64 *dst = (u64 *)walk->dst.virt.addr;
301         u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
302         __be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
303         int i;
304
305         /* Process multi-block batch */
306         if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
307                 do {
308                         /* create ctrblks for parallel encrypt */
309                         for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
310                                 if (dst != src)
311                                         dst[i] = src[i];
312
313                                 ctrblocks[i] = cpu_to_be64(ctrblk++);
314                         }
315
316                         cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
317                                                (u8 *)ctrblocks);
318
319                         src += CAST5_PARALLEL_BLOCKS;
320                         dst += CAST5_PARALLEL_BLOCKS;
321                         nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
322                 } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
323
324                 if (nbytes < bsize)
325                         goto done;
326         }
327
328         /* Handle leftovers */
329         do {
330                 if (dst != src)
331                         *dst = *src;
332
333                 ctrblocks[0] = cpu_to_be64(ctrblk++);
334
335                 __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
336                 *dst ^= ctrblocks[0];
337
338                 src += 1;
339                 dst += 1;
340                 nbytes -= bsize;
341         } while (nbytes >= bsize);
342
343 done:
344         *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
345         return nbytes;
346 }
347
348 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
349                      struct scatterlist *src, unsigned int nbytes)
350 {
351         bool fpu_enabled = false;
352         struct blkcipher_walk walk;
353         int err;
354
355         blkcipher_walk_init(&walk, dst, src, nbytes);
356         err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
357         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
358
359         while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
360                 fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
361                 nbytes = __ctr_crypt(desc, &walk);
362                 err = blkcipher_walk_done(desc, &walk, nbytes);
363         }
364
365         cast5_fpu_end(fpu_enabled);
366
367         if (walk.nbytes) {
368                 ctr_crypt_final(desc, &walk);
369                 err = blkcipher_walk_done(desc, &walk, 0);
370         }
371
372         return err;
373 }
374
375
376 static struct crypto_alg cast5_algs[6] = { {
377         .cra_name               = "__ecb-cast5-avx",
378         .cra_driver_name        = "__driver-ecb-cast5-avx",
379         .cra_priority           = 0,
380         .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
381         .cra_blocksize          = CAST5_BLOCK_SIZE,
382         .cra_ctxsize            = sizeof(struct cast5_ctx),
383         .cra_alignmask          = 0,
384         .cra_type               = &crypto_blkcipher_type,
385         .cra_module             = THIS_MODULE,
386         .cra_u = {
387                 .blkcipher = {
388                         .min_keysize    = CAST5_MIN_KEY_SIZE,
389                         .max_keysize    = CAST5_MAX_KEY_SIZE,
390                         .setkey         = cast5_setkey,
391                         .encrypt        = ecb_encrypt,
392                         .decrypt        = ecb_decrypt,
393                 },
394         },
395 }, {
396         .cra_name               = "__cbc-cast5-avx",
397         .cra_driver_name        = "__driver-cbc-cast5-avx",
398         .cra_priority           = 0,
399         .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
400         .cra_blocksize          = CAST5_BLOCK_SIZE,
401         .cra_ctxsize            = sizeof(struct cast5_ctx),
402         .cra_alignmask          = 0,
403         .cra_type               = &crypto_blkcipher_type,
404         .cra_module             = THIS_MODULE,
405         .cra_u = {
406                 .blkcipher = {
407                         .min_keysize    = CAST5_MIN_KEY_SIZE,
408                         .max_keysize    = CAST5_MAX_KEY_SIZE,
409                         .setkey         = cast5_setkey,
410                         .encrypt        = cbc_encrypt,
411                         .decrypt        = cbc_decrypt,
412                 },
413         },
414 }, {
415         .cra_name               = "__ctr-cast5-avx",
416         .cra_driver_name        = "__driver-ctr-cast5-avx",
417         .cra_priority           = 0,
418         .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
419         .cra_blocksize          = 1,
420         .cra_ctxsize            = sizeof(struct cast5_ctx),
421         .cra_alignmask          = 0,
422         .cra_type               = &crypto_blkcipher_type,
423         .cra_module             = THIS_MODULE,
424         .cra_u = {
425                 .blkcipher = {
426                         .min_keysize    = CAST5_MIN_KEY_SIZE,
427                         .max_keysize    = CAST5_MAX_KEY_SIZE,
428                         .ivsize         = CAST5_BLOCK_SIZE,
429                         .setkey         = cast5_setkey,
430                         .encrypt        = ctr_crypt,
431                         .decrypt        = ctr_crypt,
432                 },
433         },
434 }, {
435         .cra_name               = "ecb(cast5)",
436         .cra_driver_name        = "ecb-cast5-avx",
437         .cra_priority           = 200,
438         .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
439         .cra_blocksize          = CAST5_BLOCK_SIZE,
440         .cra_ctxsize            = sizeof(struct async_helper_ctx),
441         .cra_alignmask          = 0,
442         .cra_type               = &crypto_ablkcipher_type,
443         .cra_module             = THIS_MODULE,
444         .cra_init               = ablk_init,
445         .cra_exit               = ablk_exit,
446         .cra_u = {
447                 .ablkcipher = {
448                         .min_keysize    = CAST5_MIN_KEY_SIZE,
449                         .max_keysize    = CAST5_MAX_KEY_SIZE,
450                         .setkey         = ablk_set_key,
451                         .encrypt        = ablk_encrypt,
452                         .decrypt        = ablk_decrypt,
453                 },
454         },
455 }, {
456         .cra_name               = "cbc(cast5)",
457         .cra_driver_name        = "cbc-cast5-avx",
458         .cra_priority           = 200,
459         .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
460         .cra_blocksize          = CAST5_BLOCK_SIZE,
461         .cra_ctxsize            = sizeof(struct async_helper_ctx),
462         .cra_alignmask          = 0,
463         .cra_type               = &crypto_ablkcipher_type,
464         .cra_module             = THIS_MODULE,
465         .cra_init               = ablk_init,
466         .cra_exit               = ablk_exit,
467         .cra_u = {
468                 .ablkcipher = {
469                         .min_keysize    = CAST5_MIN_KEY_SIZE,
470                         .max_keysize    = CAST5_MAX_KEY_SIZE,
471                         .ivsize         = CAST5_BLOCK_SIZE,
472                         .setkey         = ablk_set_key,
473                         .encrypt        = __ablk_encrypt,
474                         .decrypt        = ablk_decrypt,
475                 },
476         },
477 }, {
478         .cra_name               = "ctr(cast5)",
479         .cra_driver_name        = "ctr-cast5-avx",
480         .cra_priority           = 200,
481         .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
482         .cra_blocksize          = 1,
483         .cra_ctxsize            = sizeof(struct async_helper_ctx),
484         .cra_alignmask          = 0,
485         .cra_type               = &crypto_ablkcipher_type,
486         .cra_module             = THIS_MODULE,
487         .cra_init               = ablk_init,
488         .cra_exit               = ablk_exit,
489         .cra_u = {
490                 .ablkcipher = {
491                         .min_keysize    = CAST5_MIN_KEY_SIZE,
492                         .max_keysize    = CAST5_MAX_KEY_SIZE,
493                         .ivsize         = CAST5_BLOCK_SIZE,
494                         .setkey         = ablk_set_key,
495                         .encrypt        = ablk_encrypt,
496                         .decrypt        = ablk_encrypt,
497                         .geniv          = "chainiv",
498                 },
499         },
500 } };
501
502 static int __init cast5_init(void)
503 {
504         u64 xcr0;
505
506         if (!cpu_has_avx || !cpu_has_osxsave) {
507                 pr_info("AVX instructions are not detected.\n");
508                 return -ENODEV;
509         }
510
511         xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
512         if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
513                 pr_info("AVX detected but unusable.\n");
514                 return -ENODEV;
515         }
516
517         return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
518 }
519
520 static void __exit cast5_exit(void)
521 {
522         crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
523 }
524
525 module_init(cast5_init);
526 module_exit(cast5_exit);
527
528 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
529 MODULE_LICENSE("GPL");
530 MODULE_ALIAS("cast5");