crypto: powerpc/aes - key handling
authorMarkus Stockhausen <stockhausen@collogia.de>
Sun, 22 Feb 2015 08:59:54 +0000 (09:59 +0100)
committerHerbert Xu <herbert@gondor.apana.org.au>
Sun, 1 Mar 2015 10:02:28 +0000 (23:02 +1300)
Key generation for big endian core routines.

Signed-off-by: Markus Stockhausen <stockhausen@collogia.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/powerpc/crypto/aes-spe-keys.S [new file with mode: 0644]

diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
new file mode 100644 (file)
index 0000000..be8090f
--- /dev/null
@@ -0,0 +1,283 @@
+/*
+ * Key handling functions for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_KEY(d, s, off) \
+       lwz             d,off(s);
+#else
+#define LOAD_KEY(d, s, off) \
+       li              r0,off; \
+       lwbrx           d,s,r0;
+#endif
+
+#define INITIALIZE_KEY \
+       stwu            r1,-32(r1);     /* create stack frame           */ \
+       stw             r14,8(r1);      /* save registers               */ \
+       stw             r15,12(r1);                                        \
+       stw             r16,16(r1);
+
+#define FINALIZE_KEY \
+       lwz             r14,8(r1);      /* restore registers            */ \
+       lwz             r15,12(r1);                                        \
+       lwz             r16,16(r1);                                        \
+       xor             r5,r5,r5;       /* clear sensitive data         */ \
+       xor             r6,r6,r6;                                          \
+       xor             r7,r7,r7;                                          \
+       xor             r8,r8,r8;                                          \
+       xor             r9,r9,r9;                                          \
+       xor             r10,r10,r10;                                       \
+       xor             r11,r11,r11;                                       \
+       xor             r12,r12,r12;                                       \
+       addi            r1,r1,32;       /* cleanup stack                */
+
+#define LS_BOX(r, t1, t2) \
+       lis             t2,PPC_AES_4K_ENCTAB@h;                            \
+       ori             t2,t2,PPC_AES_4K_ENCTAB@l;                         \
+       rlwimi          t2,r,4,20,27;                                      \
+       lbz             t1,8(t2);                                          \
+       rlwimi          r,t1,0,24,31;                                      \
+       rlwimi          t2,r,28,20,27;                                     \
+       lbz             t1,8(t2);                                          \
+       rlwimi          r,t1,8,16,23;                                      \
+       rlwimi          t2,r,20,20,27;                                     \
+       lbz             t1,8(t2);                                          \
+       rlwimi          r,t1,16,8,15;                                      \
+       rlwimi          t2,r,12,20,27;                                     \
+       lbz             t1,8(t2);                                          \
+       rlwimi          r,t1,24,0,7;
+
+#define GF8_MUL(out, in, t1, t2) \
+       lis t1,0x8080;                  /* multiplication in GF8        */ \
+       ori t1,t1,0x8080;                                                  \
+       and t1,t1,in;                                                      \
+       srwi t1,t1,7;                                                      \
+       mulli t1,t1,0x1b;                                                  \
+       lis t2,0x7f7f;                                                     \
+       ori t2,t2,0x7f7f;                                                  \
+       and t2,t2,in;                                                      \
+       slwi t2,t2,1;                                                      \
+       xor out,t1,t2;
+
+/*
+ * ppc_expand_key_128(u32 *key_enc, const u8 *key)
+ *
+ * Expand 128 bit key into 176 bytes encryption key. It consists of
+ * key itself plus 10 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_128)
+       INITIALIZE_KEY
+       LOAD_KEY(r5,r4,0)
+       LOAD_KEY(r6,r4,4)
+       LOAD_KEY(r7,r4,8)
+       LOAD_KEY(r8,r4,12)
+       stw             r5,0(r3)        /* key[0..3] = input data       */
+       stw             r6,4(r3)
+       stw             r7,8(r3)
+       stw             r8,12(r3)
+       li              r16,10          /* 10 expansion rounds          */
+       lis             r0,0x0100       /* RCO(1)                       */
+ppc_expand_128_loop:
+       addi            r3,r3,16
+       mr              r14,r8          /* apply LS_BOX to 4th temp     */
+       rotlwi          r14,r14,8
+       LS_BOX(r14, r15, r4)
+       xor             r14,r14,r0
+       xor             r5,r5,r14       /* xor next 4 keys              */
+       xor             r6,r6,r5
+       xor             r7,r7,r6
+       xor             r8,r8,r7
+       stw             r5,0(r3)        /* store next 4 keys            */
+       stw             r6,4(r3)
+       stw             r7,8(r3)
+       stw             r8,12(r3)
+       GF8_MUL(r0, r0, r4, r14)        /* multiply RCO by 2 in GF      */
+       subi            r16,r16,1
+       cmpwi           r16,0
+       bt              eq,ppc_expand_128_end
+       b               ppc_expand_128_loop
+ppc_expand_128_end:
+       FINALIZE_KEY
+       blr
+
+/*
+ * ppc_expand_key_192(u32 *key_enc, const u8 *key)
+ *
+ * Expand 192 bit key into 208 bytes encryption key. It consists of key
+ * itself plus 12 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_192)
+       INITIALIZE_KEY
+       LOAD_KEY(r5,r4,0)
+       LOAD_KEY(r6,r4,4)
+       LOAD_KEY(r7,r4,8)
+       LOAD_KEY(r8,r4,12)
+       LOAD_KEY(r9,r4,16)
+       LOAD_KEY(r10,r4,20)
+       stw             r5,0(r3)
+       stw             r6,4(r3)
+       stw             r7,8(r3)
+       stw             r8,12(r3)
+       stw             r9,16(r3)
+       stw             r10,20(r3)
+       li              r16,8           /* 8 expansion rounds           */
+       lis             r0,0x0100       /* RCO(1)                       */
+ppc_expand_192_loop:
+       addi            r3,r3,24
+       mr              r14,r10         /* apply LS_BOX to 6th temp     */
+       rotlwi          r14,r14,8
+       LS_BOX(r14, r15, r4)
+       xor             r14,r14,r0
+       xor             r5,r5,r14       /* xor next 6 keys              */
+       xor             r6,r6,r5
+       xor             r7,r7,r6
+       xor             r8,r8,r7
+       xor             r9,r9,r8
+       xor             r10,r10,r9
+       stw             r5,0(r3)
+       stw             r6,4(r3)
+       stw             r7,8(r3)
+       stw             r8,12(r3)
+       subi            r16,r16,1
+       cmpwi           r16,0           /* last round early kick out    */
+       bt              eq,ppc_expand_192_end
+       stw             r9,16(r3)
+       stw             r10,20(r3)
+       GF8_MUL(r0, r0, r4, r14)        /* multiply RCO GF8             */
+       b               ppc_expand_192_loop
+ppc_expand_192_end:
+       FINALIZE_KEY
+       blr
+
+/*
+ * ppc_expand_key_256(u32 *key_enc, const u8 *key)
+ *
+ * Expand 256 bit key into 240 bytes encryption key. It consists of key
+ * itself plus 14 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_256)
+       INITIALIZE_KEY
+       LOAD_KEY(r5,r4,0)
+       LOAD_KEY(r6,r4,4)
+       LOAD_KEY(r7,r4,8)
+       LOAD_KEY(r8,r4,12)
+       LOAD_KEY(r9,r4,16)
+       LOAD_KEY(r10,r4,20)
+       LOAD_KEY(r11,r4,24)
+       LOAD_KEY(r12,r4,28)
+       stw             r5,0(r3)
+       stw             r6,4(r3)
+       stw             r7,8(r3)
+       stw             r8,12(r3)
+       stw             r9,16(r3)
+       stw             r10,20(r3)
+       stw             r11,24(r3)
+       stw             r12,28(r3)
+       li              r16,7           /* 7 expansion rounds           */
+       lis             r0,0x0100       /* RCO(1)                       */
+ppc_expand_256_loop:
+       addi            r3,r3,32
+       mr              r14,r12         /* apply LS_BOX to 8th temp     */
+       rotlwi          r14,r14,8
+       LS_BOX(r14, r15, r4)
+       xor             r14,r14,r0
+       xor             r5,r5,r14       /* xor 4 keys                   */
+       xor             r6,r6,r5
+       xor             r7,r7,r6
+       xor             r8,r8,r7
+       mr              r14,r8
+       LS_BOX(r14, r15, r4)            /* apply LS_BOX to 4th temp     */
+       xor             r9,r9,r14       /* xor 4 keys                   */
+       xor             r10,r10,r9
+       xor             r11,r11,r10
+       xor             r12,r12,r11
+       stw             r5,0(r3)
+       stw             r6,4(r3)
+       stw             r7,8(r3)
+       stw             r8,12(r3)
+       subi            r16,r16,1
+       cmpwi           r16,0           /* last round early kick out    */
+       bt              eq,ppc_expand_256_end
+       stw             r9,16(r3)
+       stw             r10,20(r3)
+       stw             r11,24(r3)
+       stw             r12,28(r3)
+       GF8_MUL(r0, r0, r4, r14)
+       b               ppc_expand_256_loop
+ppc_expand_256_end:
+       FINALIZE_KEY
+       blr
+
+/*
+ * ppc_generate_decrypt_key: derive decryption key from encryption key
+ * number of bytes to handle are calculated from length of key (16/24/32)
+ *
+ */
+_GLOBAL(ppc_generate_decrypt_key)
+       addi            r6,r5,24
+       slwi            r6,r6,2
+       lwzx            r7,r4,r6        /* first/last 4 words are same  */
+       stw             r7,0(r3)
+       lwz             r7,0(r4)
+       stwx            r7,r3,r6
+       addi            r6,r6,4
+       lwzx            r7,r4,r6
+       stw             r7,4(r3)
+       lwz             r7,4(r4)
+       stwx            r7,r3,r6
+       addi            r6,r6,4
+       lwzx            r7,r4,r6
+       stw             r7,8(r3)
+       lwz             r7,8(r4)
+       stwx            r7,r3,r6
+       addi            r6,r6,4
+       lwzx            r7,r4,r6
+       stw             r7,12(r3)
+       lwz             r7,12(r4)
+       stwx            r7,r3,r6
+       addi            r3,r3,16
+       add             r4,r4,r6
+       subi            r4,r4,28
+       addi            r5,r5,20
+       srwi            r5,r5,2
+ppc_generate_decrypt_block:
+       li      r6,4
+       mtctr   r6
+ppc_generate_decrypt_word:
+       lwz             r6,0(r4)
+       GF8_MUL(r7, r6, r0, r7)
+       GF8_MUL(r8, r7, r0, r8)
+       GF8_MUL(r9, r8, r0, r9)
+       xor             r10,r9,r6
+       xor             r11,r7,r8
+       xor             r11,r11,r9
+       xor             r12,r7,r10
+       rotrwi          r12,r12,24
+       xor             r11,r11,r12
+       xor             r12,r8,r10
+       rotrwi          r12,r12,16
+       xor             r11,r11,r12
+       rotrwi          r12,r10,8
+       xor             r11,r11,r12
+       stw             r11,0(r3)
+       addi            r3,r3,4
+       addi            r4,r4,4
+       bdnz            ppc_generate_decrypt_word
+       subi            r4,r4,32
+       subi            r5,r5,1
+       cmpwi           r5,0
+       bt              gt,ppc_generate_decrypt_block
+       blr