crypto: aesni_intel - improve lrw and xts performance by utilizing parallel AES-NI...
authorJussi Kivilinna <jussi.kivilinna@mbnet.fi>
Sun, 22 Jul 2012 15:18:37 +0000 (18:18 +0300)
committerHerbert Xu <herbert@gondor.apana.org.au>
Mon, 20 Aug 2012 08:28:10 +0000 (16:28 +0800)
Use parallel LRW and XTS encryption facilities to better utilize AES-NI
hardware pipelines and gain extra performance.

Tcrypt benchmark results (async), old vs new ratios:

Intel Core i5-2450M CPU (fam: 6, model: 42, step: 7)

aes:128bit
        lrw:256bit      xts:256bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     0.99x   1.00x   1.22x   1.19x
64B     1.38x   1.50x   1.58x   1.61x
256B    2.04x   2.02x   2.27x   2.29x
1024B   2.56x   2.54x   2.89x   2.92x
8192B   2.85x   2.99x   3.40x   3.23x

aes:192bit
        lrw:320bit      xts:384bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.08x   1.08x   1.16x   1.17x
64B     1.48x   1.54x   1.59x   1.65x
256B    2.18x   2.17x   2.29x   2.28x
1024B   2.67x   2.67x   2.87x   3.05x
8192B   2.93x   2.84x   3.28x   3.33x

aes:256bit
        lrw:348bit      xts:512bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.07x   1.07x   1.18x   1.19x
64B     1.56x   1.56x   1.70x   1.71x
256B    2.22x   2.24x   2.46x   2.46x
1024B   2.76x   2.77x   3.13x   3.05x
8192B   2.99x   3.05x   3.40x   3.30x

Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/aesni-intel_glue.c
crypto/Kconfig

index 648347a0577309309d21f1a5aa0492f904caa4a0..7c04d0da709b10a002ecd26db5bf0ec0b7b3bb6c 100644 (file)
@@ -28,6 +28,9 @@
 #include <crypto/aes.h>
 #include <crypto/cryptd.h>
 #include <crypto/ctr.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
 #include <asm/i387.h>
 #include <asm/crypto/aes.h>
 #define HAS_CTR
 #endif
 
-#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
-#define HAS_LRW
-#endif
-
 #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
 #define HAS_PCBC
 #endif
 
-#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
-#define HAS_XTS
-#endif
-
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
@@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data {
 #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
 #define RFC4106_HASH_SUBKEY_SIZE 16
 
+struct aesni_lrw_ctx {
+       struct lrw_table_ctx lrw_table;
+       u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
+struct aesni_xts_ctx {
+       u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+       u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
                             unsigned int key_len);
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
 #endif
 #endif
 
-#ifdef HAS_LRW
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
-       return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))");
-}
-#endif
-
 #ifdef HAS_PCBC
 static int ablk_pcbc_init(struct crypto_tfm *tfm)
 {
@@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm)
 }
 #endif
 
-#ifdef HAS_XTS
-static int ablk_xts_init(struct crypto_tfm *tfm)
+static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
 {
-       return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))");
+       aesni_ecb_enc(ctx, blks, blks, nbytes);
+}
+
+static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
+{
+       aesni_ecb_dec(ctx, blks, blks, nbytes);
+}
+
+static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+                           unsigned int keylen)
+{
+       struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+       int err;
+
+       err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
+                                keylen - AES_BLOCK_SIZE);
+       if (err)
+               return err;
+
+       return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
+}
+
+static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
+{
+       struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       lrw_free_table(&ctx->lrw_table);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       be128 buf[8];
+       struct lrw_crypt_req req = {
+               .tbuf = buf,
+               .tbuflen = sizeof(buf),
+
+               .table_ctx = &ctx->lrw_table,
+               .crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+               .crypt_fn = lrw_xts_encrypt_callback,
+       };
+       int ret;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       kernel_fpu_begin();
+       ret = lrw_crypt(desc, dst, src, nbytes, &req);
+       kernel_fpu_end();
+
+       return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       be128 buf[8];
+       struct lrw_crypt_req req = {
+               .tbuf = buf,
+               .tbuflen = sizeof(buf),
+
+               .table_ctx = &ctx->lrw_table,
+               .crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+               .crypt_fn = lrw_xts_decrypt_callback,
+       };
+       int ret;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       kernel_fpu_begin();
+       ret = lrw_crypt(desc, dst, src, nbytes, &req);
+       kernel_fpu_end();
+
+       return ret;
+}
+
+static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+                           unsigned int keylen)
+{
+       struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+       u32 *flags = &tfm->crt_flags;
+       int err;
+
+       /* key consists of keys of equal size concatenated, therefore
+        * the length must be even
+        */
+       if (keylen % 2) {
+               *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+
+       /* first half of xts-key is for crypt */
+       err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
+       if (err)
+               return err;
+
+       /* second half of xts-key is for tweak */
+       return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
+                                 keylen / 2);
+}
+
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       be128 buf[8];
+       struct xts_crypt_req req = {
+               .tbuf = buf,
+               .tbuflen = sizeof(buf),
+
+               .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+               .tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+               .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+               .crypt_fn = lrw_xts_encrypt_callback,
+       };
+       int ret;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       kernel_fpu_begin();
+       ret = xts_crypt(desc, dst, src, nbytes, &req);
+       kernel_fpu_end();
+
+       return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       be128 buf[8];
+       struct xts_crypt_req req = {
+               .tbuf = buf,
+               .tbuflen = sizeof(buf),
+
+               .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+               .tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+               .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+               .crypt_fn = lrw_xts_decrypt_callback,
+       };
+       int ret;
+
+       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       kernel_fpu_begin();
+       ret = xts_crypt(desc, dst, src, nbytes, &req);
+       kernel_fpu_end();
+
+       return ret;
 }
-#endif
 
 #ifdef CONFIG_X86_64
 static int rfc4106_init(struct crypto_tfm *tfm)
@@ -1035,10 +1181,10 @@ static struct crypto_alg aesni_algs[] = { {
        },
 #endif
 #endif
-#ifdef HAS_LRW
+#ifdef HAS_PCBC
 }, {
-       .cra_name               = "lrw(aes)",
-       .cra_driver_name        = "lrw-aes-aesni",
+       .cra_name               = "pcbc(aes)",
+       .cra_driver_name        = "pcbc-aes-aesni",
        .cra_priority           = 400,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
        .cra_blocksize          = AES_BLOCK_SIZE,
@@ -1046,12 +1192,12 @@ static struct crypto_alg aesni_algs[] = { {
        .cra_alignmask          = 0,
        .cra_type               = &crypto_ablkcipher_type,
        .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_lrw_init,
+       .cra_init               = ablk_pcbc_init,
        .cra_exit               = ablk_exit,
        .cra_u = {
                .ablkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+                       .min_keysize    = AES_MIN_KEY_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE,
                        .ivsize         = AES_BLOCK_SIZE,
                        .setkey         = ablk_set_key,
                        .encrypt        = ablk_encrypt,
@@ -1059,10 +1205,50 @@ static struct crypto_alg aesni_algs[] = { {
                },
        },
 #endif
-#ifdef HAS_PCBC
 }, {
-       .cra_name               = "pcbc(aes)",
-       .cra_driver_name        = "pcbc-aes-aesni",
+       .cra_name               = "__lrw-aes-aesni",
+       .cra_driver_name        = "__driver-lrw-aes-aesni",
+       .cra_priority           = 0,
+       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          = AES_BLOCK_SIZE,
+       .cra_ctxsize            = sizeof(struct aesni_lrw_ctx),
+       .cra_alignmask          = 0,
+       .cra_type               = &crypto_blkcipher_type,
+       .cra_module             = THIS_MODULE,
+       .cra_exit               = lrw_aesni_exit_tfm,
+       .cra_u = {
+               .blkcipher = {
+                       .min_keysize    = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+                       .ivsize         = AES_BLOCK_SIZE,
+                       .setkey         = lrw_aesni_setkey,
+                       .encrypt        = lrw_encrypt,
+                       .decrypt        = lrw_decrypt,
+               },
+       },
+}, {
+       .cra_name               = "__xts-aes-aesni",
+       .cra_driver_name        = "__driver-xts-aes-aesni",
+       .cra_priority           = 0,
+       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          = AES_BLOCK_SIZE,
+       .cra_ctxsize            = sizeof(struct aesni_xts_ctx),
+       .cra_alignmask          = 0,
+       .cra_type               = &crypto_blkcipher_type,
+       .cra_module             = THIS_MODULE,
+       .cra_u = {
+               .blkcipher = {
+                       .min_keysize    = 2 * AES_MIN_KEY_SIZE,
+                       .max_keysize    = 2 * AES_MAX_KEY_SIZE,
+                       .ivsize         = AES_BLOCK_SIZE,
+                       .setkey         = xts_aesni_setkey,
+                       .encrypt        = xts_encrypt,
+                       .decrypt        = xts_decrypt,
+               },
+       },
+}, {
+       .cra_name               = "lrw(aes)",
+       .cra_driver_name        = "lrw-aes-aesni",
        .cra_priority           = 400,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
        .cra_blocksize          = AES_BLOCK_SIZE,
@@ -1070,20 +1256,18 @@ static struct crypto_alg aesni_algs[] = { {
        .cra_alignmask          = 0,
        .cra_type               = &crypto_ablkcipher_type,
        .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_pcbc_init,
+       .cra_init               = ablk_init,
        .cra_exit               = ablk_exit,
        .cra_u = {
                .ablkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
+                       .min_keysize    = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
                        .ivsize         = AES_BLOCK_SIZE,
                        .setkey         = ablk_set_key,
                        .encrypt        = ablk_encrypt,
                        .decrypt        = ablk_decrypt,
                },
        },
-#endif
-#ifdef HAS_XTS
 }, {
        .cra_name               = "xts(aes)",
        .cra_driver_name        = "xts-aes-aesni",
@@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { {
        .cra_alignmask          = 0,
        .cra_type               = &crypto_ablkcipher_type,
        .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_xts_init,
+       .cra_init               = ablk_init,
        .cra_exit               = ablk_exit,
        .cra_u = {
                .ablkcipher = {
@@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { {
                        .decrypt        = ablk_decrypt,
                },
        },
-#endif
 } };
 
 
index cbcc0e2eeda0d7f5c733c14521cd49654cda486d..213fb37be51feb9a109cec7bc3bba312001a9215 100644 (file)
@@ -564,6 +564,8 @@ config CRYPTO_AES_NI_INTEL
        select CRYPTO_CRYPTD
        select CRYPTO_ABLK_HELPER_X86
        select CRYPTO_ALGAPI
+       select CRYPTO_LRW
+       select CRYPTO_XTS
        help
          Use Intel AES-NI instructions for AES algorithm.