crypto: arm64/gcm - move authentication tag check to SIMD domain
authorArd Biesheuvel <ardb@kernel.org>
Tue, 10 Nov 2020 09:10:42 +0000 (10:10 +0100)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 20 Nov 2020 03:45:32 +0000 (14:45 +1100)
Instead of copying the calculated authentication tag to memory and
calling crypto_memneq() to verify it, use vector bytewise compare and
min across vector instructions to decide whether the tag is valid. This
is more efficient, and given that the tag is only transiently held in a
NEON register, it is also safer, given that calculated tags for failed
decryptions should be withheld.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm64/crypto/ghash-ce-core.S
arch/arm64/crypto/ghash-ce-glue.c

index 6b958dc..7868330 100644 (file)
@@ -544,7 +544,22 @@ CPU_LE(    rev             w8, w8          )
        ext             XL.16b, XL.16b, XL.16b, #8
        rev64           XL.16b, XL.16b
        eor             XL.16b, XL.16b, KS0.16b
+
+       .if             \enc == 1
        st1             {XL.16b}, [x10]                 // store tag
+       .else
+       ldp             x11, x12, [sp, #40]             // load tag pointer and authsize
+       adr_l           x17, .Lpermute_table
+       ld1             {KS0.16b}, [x11]                // load supplied tag
+       add             x17, x17, x12
+       ld1             {KS1.16b}, [x17]                // load permute vector
+
+       cmeq            XL.16b, XL.16b, KS0.16b         // compare tags
+       mvn             XL.16b, XL.16b                  // -1 for fail, 0 for pass
+       tbl             XL.16b, {XL.16b}, KS1.16b       // keep authsize bytes only
+       sminv           b0, XL.16b                      // signed minimum across XL
+       smov            w0, v0.b[0]                     // return b0
+       .endif
 
 4:     ldp             x29, x30, [sp], #32
        ret
index 2427e2f..720cd3a 100644 (file)
@@ -55,10 +55,10 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
                                  u64 const h[][2], u64 dg[], u8 ctr[],
                                  u32 const rk[], int rounds, u8 tag[]);
-
-asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
-                                 u64 const h[][2], u64 dg[], u8 ctr[],
-                                 u32 const rk[], int rounds, u8 tag[]);
+asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+                                u64 const h[][2], u64 dg[], u8 ctr[],
+                                u32 const rk[], int rounds, const u8 l[],
+                                const u8 tag[], u64 authsize);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -458,6 +458,7 @@ static int gcm_decrypt(struct aead_request *req)
        unsigned int authsize = crypto_aead_authsize(aead);
        int nrounds = num_rounds(&ctx->aes_key);
        struct skcipher_walk walk;
+       u8 otag[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u8 iv[AES_BLOCK_SIZE];
        u64 dg[2] = {};
@@ -474,9 +475,15 @@ static int gcm_decrypt(struct aead_request *req)
        memcpy(iv, req->iv, GCM_IV_SIZE);
        put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
+       scatterwalk_map_and_copy(otag, req->src,
+                                req->assoclen + req->cryptlen - authsize,
+                                authsize, 0);
+
        err = skcipher_walk_aead_decrypt(&walk, req, false);
 
        if (likely(crypto_simd_usable())) {
+               int ret;
+
                do {
                        const u8 *src = walk.src.virt.addr;
                        u8 *dst = walk.dst.virt.addr;
@@ -493,9 +500,10 @@ static int gcm_decrypt(struct aead_request *req)
                        }
 
                        kernel_neon_begin();
-                       pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
-                                         dg, iv, ctx->aes_key.key_enc, nrounds,
-                                         tag);
+                       ret = pmull_gcm_decrypt(nbytes, dst, src,
+                                               ctx->ghash_key.h,
+                                               dg, iv, ctx->aes_key.key_enc,
+                                               nrounds, tag, otag, authsize);
                        kernel_neon_end();
 
                        if (unlikely(!nbytes))
@@ -507,6 +515,11 @@ static int gcm_decrypt(struct aead_request *req)
 
                        err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
                } while (walk.nbytes);
+
+               if (err)
+                       return err;
+               if (ret)
+                       return -EBADMSG;
        } else {
                while (walk.nbytes >= AES_BLOCK_SIZE) {
                        int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -548,23 +561,20 @@ static int gcm_decrypt(struct aead_request *req)
                        err = skcipher_walk_done(&walk, 0);
                }
 
+               if (err)
+                       return err;
+
                put_unaligned_be64(dg[1], tag);
                put_unaligned_be64(dg[0], tag + 8);
                put_unaligned_be32(1, iv + GCM_IV_SIZE);
                aes_encrypt(&ctx->aes_key, iv, iv);
                crypto_xor(tag, iv, AES_BLOCK_SIZE);
-       }
-
-       if (err)
-               return err;
 
-       /* compare calculated auth tag with the stored one */
-       scatterwalk_map_and_copy(buf, req->src,
-                                req->assoclen + req->cryptlen - authsize,
-                                authsize, 0);
-
-       if (crypto_memneq(tag, buf, authsize))
-               return -EBADMSG;
+               if (crypto_memneq(tag, otag, authsize)) {
+                       memzero_explicit(tag, AES_BLOCK_SIZE);
+                       return -EBADMSG;
+               }
+       }
        return 0;
 }